mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-18 20:17:28 +00:00
Merge pull request #1957 from seefs001/pr/custom-currency-1923
💱 feat(settings): introduce site-wide quota display type
This commit is contained in:
@@ -18,7 +18,9 @@ import (
|
||||
type Adaptor struct {
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) { return nil, errors.New("not implemented") }
|
||||
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error) {
|
||||
openaiAdaptor := openai.Adaptor{}
|
||||
@@ -33,17 +35,25 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn
|
||||
return openAIChatToOllamaChat(c, openaiRequest.(*dto.GeneralOpenAIRequest))
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { return nil, errors.New("not implemented") }
|
||||
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) { return nil, errors.New("not implemented") }
|
||||
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
||||
}
|
||||
|
||||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
if info.RelayMode == relayconstant.RelayModeEmbeddings { return info.ChannelBaseUrl + "/api/embed", nil }
|
||||
if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions { return info.ChannelBaseUrl + "/api/generate", nil }
|
||||
return info.ChannelBaseUrl + "/api/chat", nil
|
||||
if info.RelayMode == relayconstant.RelayModeEmbeddings {
|
||||
return info.ChannelBaseUrl + "/api/embed", nil
|
||||
}
|
||||
if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions {
|
||||
return info.ChannelBaseUrl + "/api/generate", nil
|
||||
}
|
||||
return info.ChannelBaseUrl + "/api/chat", nil
|
||||
}
|
||||
|
||||
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
|
||||
@@ -53,7 +63,9 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
|
||||
if request == nil { return nil, errors.New("request is nil") }
|
||||
if request == nil {
|
||||
return nil, errors.New("request is nil")
|
||||
}
|
||||
// decide generate or chat
|
||||
if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions {
|
||||
return openAIToGenerate(c, request)
|
||||
@@ -69,7 +81,9 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
||||
return requestOpenAI2Embeddings(request), nil
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) { return nil, errors.New("not implemented") }
|
||||
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
|
||||
return channel.DoApiRequest(a, c, info, requestBody)
|
||||
|
||||
@@ -5,12 +5,12 @@ import (
|
||||
)
|
||||
|
||||
type OllamaChatMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content,omitempty"`
|
||||
Images []string `json:"images,omitempty"`
|
||||
ToolCalls []OllamaToolCall `json:"tool_calls,omitempty"`
|
||||
ToolName string `json:"tool_name,omitempty"`
|
||||
Thinking json.RawMessage `json:"thinking,omitempty"`
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content,omitempty"`
|
||||
Images []string `json:"images,omitempty"`
|
||||
ToolCalls []OllamaToolCall `json:"tool_calls,omitempty"`
|
||||
ToolName string `json:"tool_name,omitempty"`
|
||||
Thinking json.RawMessage `json:"thinking,omitempty"`
|
||||
}
|
||||
|
||||
type OllamaToolFunction struct {
|
||||
@@ -20,7 +20,7 @@ type OllamaToolFunction struct {
|
||||
}
|
||||
|
||||
type OllamaTool struct {
|
||||
Type string `json:"type"`
|
||||
Type string `json:"type"`
|
||||
Function OllamaToolFunction `json:"function"`
|
||||
}
|
||||
|
||||
@@ -43,28 +43,27 @@ type OllamaChatRequest struct {
|
||||
}
|
||||
|
||||
type OllamaGenerateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt,omitempty"`
|
||||
Suffix string `json:"suffix,omitempty"`
|
||||
Images []string `json:"images,omitempty"`
|
||||
Format interface{} `json:"format,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
KeepAlive interface{} `json:"keep_alive,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Prompt string `json:"prompt,omitempty"`
|
||||
Suffix string `json:"suffix,omitempty"`
|
||||
Images []string `json:"images,omitempty"`
|
||||
Format interface{} `json:"format,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
KeepAlive interface{} `json:"keep_alive,omitempty"`
|
||||
Think json.RawMessage `json:"think,omitempty"`
|
||||
}
|
||||
|
||||
type OllamaEmbeddingRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input interface{} `json:"input"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Input interface{} `json:"input"`
|
||||
Options map[string]any `json:"options,omitempty"`
|
||||
Dimensions int `json:"dimensions,omitempty"`
|
||||
}
|
||||
|
||||
type OllamaEmbeddingResponse struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Embeddings [][]float64 `json:"embeddings"`
|
||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Embeddings [][]float64 `json:"embeddings"`
|
||||
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
|
||||
}
|
||||
|
||||
|
||||
@@ -35,13 +35,27 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam
|
||||
}
|
||||
|
||||
// options mapping
|
||||
if r.Temperature != nil { chatReq.Options["temperature"] = r.Temperature }
|
||||
if r.TopP != 0 { chatReq.Options["top_p"] = r.TopP }
|
||||
if r.TopK != 0 { chatReq.Options["top_k"] = r.TopK }
|
||||
if r.FrequencyPenalty != 0 { chatReq.Options["frequency_penalty"] = r.FrequencyPenalty }
|
||||
if r.PresencePenalty != 0 { chatReq.Options["presence_penalty"] = r.PresencePenalty }
|
||||
if r.Seed != 0 { chatReq.Options["seed"] = int(r.Seed) }
|
||||
if mt := r.GetMaxTokens(); mt != 0 { chatReq.Options["num_predict"] = int(mt) }
|
||||
if r.Temperature != nil {
|
||||
chatReq.Options["temperature"] = r.Temperature
|
||||
}
|
||||
if r.TopP != 0 {
|
||||
chatReq.Options["top_p"] = r.TopP
|
||||
}
|
||||
if r.TopK != 0 {
|
||||
chatReq.Options["top_k"] = r.TopK
|
||||
}
|
||||
if r.FrequencyPenalty != 0 {
|
||||
chatReq.Options["frequency_penalty"] = r.FrequencyPenalty
|
||||
}
|
||||
if r.PresencePenalty != 0 {
|
||||
chatReq.Options["presence_penalty"] = r.PresencePenalty
|
||||
}
|
||||
if r.Seed != 0 {
|
||||
chatReq.Options["seed"] = int(r.Seed)
|
||||
}
|
||||
if mt := r.GetMaxTokens(); mt != 0 {
|
||||
chatReq.Options["num_predict"] = int(mt)
|
||||
}
|
||||
|
||||
if r.Stop != nil {
|
||||
switch v := r.Stop.(type) {
|
||||
@@ -50,21 +64,27 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam
|
||||
case []string:
|
||||
chatReq.Options["stop"] = v
|
||||
case []any:
|
||||
arr := make([]string,0,len(v))
|
||||
for _, i := range v { if s,ok:=i.(string); ok { arr = append(arr,s) } }
|
||||
if len(arr)>0 { chatReq.Options["stop"] = arr }
|
||||
arr := make([]string, 0, len(v))
|
||||
for _, i := range v {
|
||||
if s, ok := i.(string); ok {
|
||||
arr = append(arr, s)
|
||||
}
|
||||
}
|
||||
if len(arr) > 0 {
|
||||
chatReq.Options["stop"] = arr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(r.Tools) > 0 {
|
||||
tools := make([]OllamaTool,0,len(r.Tools))
|
||||
tools := make([]OllamaTool, 0, len(r.Tools))
|
||||
for _, t := range r.Tools {
|
||||
tools = append(tools, OllamaTool{Type: "function", Function: OllamaToolFunction{Name: t.Function.Name, Description: t.Function.Description, Parameters: t.Function.Parameters}})
|
||||
}
|
||||
chatReq.Tools = tools
|
||||
}
|
||||
|
||||
chatReq.Messages = make([]OllamaChatMessage,0,len(r.Messages))
|
||||
chatReq.Messages = make([]OllamaChatMessage, 0, len(r.Messages))
|
||||
for _, m := range r.Messages {
|
||||
var textBuilder strings.Builder
|
||||
var images []string
|
||||
@@ -79,14 +99,20 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam
|
||||
var base64Data string
|
||||
if strings.HasPrefix(img.Url, "http") {
|
||||
fileData, err := service.GetFileBase64FromUrl(c, img.Url, "fetch image for ollama chat")
|
||||
if err != nil { return nil, err }
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
base64Data = fileData.Base64Data
|
||||
} else if strings.HasPrefix(img.Url, "data:") {
|
||||
if idx := strings.Index(img.Url, ","); idx != -1 && idx+1 < len(img.Url) { base64Data = img.Url[idx+1:] }
|
||||
if idx := strings.Index(img.Url, ","); idx != -1 && idx+1 < len(img.Url) {
|
||||
base64Data = img.Url[idx+1:]
|
||||
}
|
||||
} else {
|
||||
base64Data = img.Url
|
||||
}
|
||||
if base64Data != "" { images = append(images, base64Data) }
|
||||
if base64Data != "" {
|
||||
images = append(images, base64Data)
|
||||
}
|
||||
}
|
||||
} else if part.Type == dto.ContentTypeText {
|
||||
textBuilder.WriteString(part.Text)
|
||||
@@ -94,16 +120,24 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam
|
||||
}
|
||||
}
|
||||
cm := OllamaChatMessage{Role: m.Role, Content: textBuilder.String()}
|
||||
if len(images)>0 { cm.Images = images }
|
||||
if m.Role == "tool" && m.Name != nil { cm.ToolName = *m.Name }
|
||||
if len(images) > 0 {
|
||||
cm.Images = images
|
||||
}
|
||||
if m.Role == "tool" && m.Name != nil {
|
||||
cm.ToolName = *m.Name
|
||||
}
|
||||
if m.ToolCalls != nil && len(m.ToolCalls) > 0 {
|
||||
parsed := m.ParseToolCalls()
|
||||
if len(parsed) > 0 {
|
||||
calls := make([]OllamaToolCall,0,len(parsed))
|
||||
calls := make([]OllamaToolCall, 0, len(parsed))
|
||||
for _, tc := range parsed {
|
||||
var args interface{}
|
||||
if tc.Function.Arguments != "" { _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) }
|
||||
if args==nil { args = map[string]any{} }
|
||||
if tc.Function.Arguments != "" {
|
||||
_ = json.Unmarshal([]byte(tc.Function.Arguments), &args)
|
||||
}
|
||||
if args == nil {
|
||||
args = map[string]any{}
|
||||
}
|
||||
oc := OllamaToolCall{}
|
||||
oc.Function.Name = tc.Function.Name
|
||||
oc.Function.Arguments = args
|
||||
@@ -132,28 +166,67 @@ func openAIToGenerate(c *gin.Context, r *dto.GeneralOpenAIRequest) (*OllamaGener
|
||||
gen.Prompt = v
|
||||
case []any:
|
||||
var sb strings.Builder
|
||||
for _, it := range v { if s,ok:=it.(string); ok { sb.WriteString(s) } }
|
||||
for _, it := range v {
|
||||
if s, ok := it.(string); ok {
|
||||
sb.WriteString(s)
|
||||
}
|
||||
}
|
||||
gen.Prompt = sb.String()
|
||||
default:
|
||||
gen.Prompt = fmt.Sprintf("%v", r.Prompt)
|
||||
}
|
||||
}
|
||||
if r.Suffix != nil { if s,ok:=r.Suffix.(string); ok { gen.Suffix = s } }
|
||||
if r.ResponseFormat != nil {
|
||||
if r.ResponseFormat.Type == "json" { gen.Format = "json" } else if r.ResponseFormat.Type == "json_schema" { var schema any; _ = json.Unmarshal(r.ResponseFormat.JsonSchema,&schema); gen.Format=schema }
|
||||
if r.Suffix != nil {
|
||||
if s, ok := r.Suffix.(string); ok {
|
||||
gen.Suffix = s
|
||||
}
|
||||
}
|
||||
if r.ResponseFormat != nil {
|
||||
if r.ResponseFormat.Type == "json" {
|
||||
gen.Format = "json"
|
||||
} else if r.ResponseFormat.Type == "json_schema" {
|
||||
var schema any
|
||||
_ = json.Unmarshal(r.ResponseFormat.JsonSchema, &schema)
|
||||
gen.Format = schema
|
||||
}
|
||||
}
|
||||
if r.Temperature != nil {
|
||||
gen.Options["temperature"] = r.Temperature
|
||||
}
|
||||
if r.TopP != 0 {
|
||||
gen.Options["top_p"] = r.TopP
|
||||
}
|
||||
if r.TopK != 0 {
|
||||
gen.Options["top_k"] = r.TopK
|
||||
}
|
||||
if r.FrequencyPenalty != 0 {
|
||||
gen.Options["frequency_penalty"] = r.FrequencyPenalty
|
||||
}
|
||||
if r.PresencePenalty != 0 {
|
||||
gen.Options["presence_penalty"] = r.PresencePenalty
|
||||
}
|
||||
if r.Seed != 0 {
|
||||
gen.Options["seed"] = int(r.Seed)
|
||||
}
|
||||
if mt := r.GetMaxTokens(); mt != 0 {
|
||||
gen.Options["num_predict"] = int(mt)
|
||||
}
|
||||
if r.Temperature != nil { gen.Options["temperature"] = r.Temperature }
|
||||
if r.TopP != 0 { gen.Options["top_p"] = r.TopP }
|
||||
if r.TopK != 0 { gen.Options["top_k"] = r.TopK }
|
||||
if r.FrequencyPenalty != 0 { gen.Options["frequency_penalty"] = r.FrequencyPenalty }
|
||||
if r.PresencePenalty != 0 { gen.Options["presence_penalty"] = r.PresencePenalty }
|
||||
if r.Seed != 0 { gen.Options["seed"] = int(r.Seed) }
|
||||
if mt := r.GetMaxTokens(); mt != 0 { gen.Options["num_predict"] = int(mt) }
|
||||
if r.Stop != nil {
|
||||
switch v := r.Stop.(type) {
|
||||
case string: gen.Options["stop"] = []string{v}
|
||||
case []string: gen.Options["stop"] = v
|
||||
case []any: arr:=make([]string,0,len(v)); for _,i:= range v { if s,ok:=i.(string); ok { arr=append(arr,s) } }; if len(arr)>0 { gen.Options["stop"]=arr }
|
||||
case string:
|
||||
gen.Options["stop"] = []string{v}
|
||||
case []string:
|
||||
gen.Options["stop"] = v
|
||||
case []any:
|
||||
arr := make([]string, 0, len(v))
|
||||
for _, i := range v {
|
||||
if s, ok := i.(string); ok {
|
||||
arr = append(arr, s)
|
||||
}
|
||||
}
|
||||
if len(arr) > 0 {
|
||||
gen.Options["stop"] = arr
|
||||
}
|
||||
}
|
||||
}
|
||||
return gen, nil
|
||||
@@ -161,30 +234,51 @@ func openAIToGenerate(c *gin.Context, r *dto.GeneralOpenAIRequest) (*OllamaGener
|
||||
|
||||
func requestOpenAI2Embeddings(r dto.EmbeddingRequest) *OllamaEmbeddingRequest {
|
||||
opts := map[string]any{}
|
||||
if r.Temperature != nil { opts["temperature"] = r.Temperature }
|
||||
if r.TopP != 0 { opts["top_p"] = r.TopP }
|
||||
if r.FrequencyPenalty != 0 { opts["frequency_penalty"] = r.FrequencyPenalty }
|
||||
if r.PresencePenalty != 0 { opts["presence_penalty"] = r.PresencePenalty }
|
||||
if r.Seed != 0 { opts["seed"] = int(r.Seed) }
|
||||
if r.Dimensions != 0 { opts["dimensions"] = r.Dimensions }
|
||||
if r.Temperature != nil {
|
||||
opts["temperature"] = r.Temperature
|
||||
}
|
||||
if r.TopP != 0 {
|
||||
opts["top_p"] = r.TopP
|
||||
}
|
||||
if r.FrequencyPenalty != 0 {
|
||||
opts["frequency_penalty"] = r.FrequencyPenalty
|
||||
}
|
||||
if r.PresencePenalty != 0 {
|
||||
opts["presence_penalty"] = r.PresencePenalty
|
||||
}
|
||||
if r.Seed != 0 {
|
||||
opts["seed"] = int(r.Seed)
|
||||
}
|
||||
if r.Dimensions != 0 {
|
||||
opts["dimensions"] = r.Dimensions
|
||||
}
|
||||
input := r.ParseInput()
|
||||
if len(input)==1 { return &OllamaEmbeddingRequest{Model:r.Model, Input: input[0], Options: opts, Dimensions:r.Dimensions} }
|
||||
return &OllamaEmbeddingRequest{Model:r.Model, Input: input, Options: opts, Dimensions:r.Dimensions}
|
||||
if len(input) == 1 {
|
||||
return &OllamaEmbeddingRequest{Model: r.Model, Input: input[0], Options: opts, Dimensions: r.Dimensions}
|
||||
}
|
||||
return &OllamaEmbeddingRequest{Model: r.Model, Input: input, Options: opts, Dimensions: r.Dimensions}
|
||||
}
|
||||
|
||||
func ollamaEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
|
||||
var oResp OllamaEmbeddingResponse
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) }
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
service.CloseResponseBodyGracefully(resp)
|
||||
if err = common.Unmarshal(body, &oResp); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) }
|
||||
if oResp.Error != "" { return nil, types.NewOpenAIError(fmt.Errorf("ollama error: %s", oResp.Error), types.ErrorCodeBadResponseBody, http.StatusInternalServerError) }
|
||||
data := make([]dto.OpenAIEmbeddingResponseItem,0,len(oResp.Embeddings))
|
||||
for i, emb := range oResp.Embeddings { data = append(data, dto.OpenAIEmbeddingResponseItem{Index:i,Object:"embedding",Embedding:emb}) }
|
||||
usage := &dto.Usage{PromptTokens: oResp.PromptEvalCount, CompletionTokens:0, TotalTokens: oResp.PromptEvalCount}
|
||||
embResp := &dto.OpenAIEmbeddingResponse{Object:"list", Data:data, Model: info.UpstreamModelName, Usage:*usage}
|
||||
if err = common.Unmarshal(body, &oResp); err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
if oResp.Error != "" {
|
||||
return nil, types.NewOpenAIError(fmt.Errorf("ollama error: %s", oResp.Error), types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
data := make([]dto.OpenAIEmbeddingResponseItem, 0, len(oResp.Embeddings))
|
||||
for i, emb := range oResp.Embeddings {
|
||||
data = append(data, dto.OpenAIEmbeddingResponseItem{Index: i, Object: "embedding", Embedding: emb})
|
||||
}
|
||||
usage := &dto.Usage{PromptTokens: oResp.PromptEvalCount, CompletionTokens: 0, TotalTokens: oResp.PromptEvalCount}
|
||||
embResp := &dto.OpenAIEmbeddingResponse{Object: "list", Data: data, Model: info.UpstreamModelName, Usage: *usage}
|
||||
out, _ := common.Marshal(embResp)
|
||||
service.IOCopyBytesGracefully(c, resp, out)
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,210 +1,278 @@
|
||||
package ollama
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/dto"
|
||||
"one-api/logger"
|
||||
relaycommon "one-api/relay/common"
|
||||
"one-api/relay/helper"
|
||||
"one-api/service"
|
||||
"one-api/types"
|
||||
"strings"
|
||||
"time"
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/dto"
|
||||
"one-api/logger"
|
||||
relaycommon "one-api/relay/common"
|
||||
"one-api/relay/helper"
|
||||
"one-api/service"
|
||||
"one-api/types"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type ollamaChatStreamChunk struct {
|
||||
Model string `json:"model"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
// chat
|
||||
Message *struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Thinking json.RawMessage `json:"thinking"`
|
||||
ToolCalls []struct {
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments interface{} `json:"arguments"`
|
||||
} `json:"function"`
|
||||
} `json:"tool_calls"`
|
||||
} `json:"message"`
|
||||
// generate
|
||||
Response string `json:"response"`
|
||||
Done bool `json:"done"`
|
||||
DoneReason string `json:"done_reason"`
|
||||
TotalDuration int64 `json:"total_duration"`
|
||||
LoadDuration int64 `json:"load_duration"`
|
||||
PromptEvalCount int `json:"prompt_eval_count"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
PromptEvalDuration int64 `json:"prompt_eval_duration"`
|
||||
EvalDuration int64 `json:"eval_duration"`
|
||||
Model string `json:"model"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
// chat
|
||||
Message *struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Thinking json.RawMessage `json:"thinking"`
|
||||
ToolCalls []struct {
|
||||
Function struct {
|
||||
Name string `json:"name"`
|
||||
Arguments interface{} `json:"arguments"`
|
||||
} `json:"function"`
|
||||
} `json:"tool_calls"`
|
||||
} `json:"message"`
|
||||
// generate
|
||||
Response string `json:"response"`
|
||||
Done bool `json:"done"`
|
||||
DoneReason string `json:"done_reason"`
|
||||
TotalDuration int64 `json:"total_duration"`
|
||||
LoadDuration int64 `json:"load_duration"`
|
||||
PromptEvalCount int `json:"prompt_eval_count"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
PromptEvalDuration int64 `json:"prompt_eval_duration"`
|
||||
EvalDuration int64 `json:"eval_duration"`
|
||||
}
|
||||
|
||||
func toUnix(ts string) int64 {
|
||||
if ts == "" { return time.Now().Unix() }
|
||||
// try time.RFC3339 or with nanoseconds
|
||||
t, err := time.Parse(time.RFC3339Nano, ts)
|
||||
if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() }
|
||||
return t.Unix()
|
||||
if ts == "" {
|
||||
return time.Now().Unix()
|
||||
}
|
||||
// try time.RFC3339 or with nanoseconds
|
||||
t, err := time.Parse(time.RFC3339Nano, ts)
|
||||
if err != nil {
|
||||
t2, err2 := time.Parse(time.RFC3339, ts)
|
||||
if err2 == nil {
|
||||
return t2.Unix()
|
||||
}
|
||||
return time.Now().Unix()
|
||||
}
|
||||
return t.Unix()
|
||||
}
|
||||
|
||||
func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
|
||||
if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) }
|
||||
defer service.CloseResponseBodyGracefully(resp)
|
||||
if resp == nil || resp.Body == nil {
|
||||
return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest)
|
||||
}
|
||||
defer service.CloseResponseBodyGracefully(resp)
|
||||
|
||||
helper.SetEventStreamHeaders(c)
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
usage := &dto.Usage{}
|
||||
var model = info.UpstreamModelName
|
||||
var responseId = common.GetUUID()
|
||||
var created = time.Now().Unix()
|
||||
var toolCallIndex int
|
||||
start := helper.GenerateStartEmptyResponse(responseId, created, model, nil)
|
||||
if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) }
|
||||
helper.SetEventStreamHeaders(c)
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
usage := &dto.Usage{}
|
||||
var model = info.UpstreamModelName
|
||||
var responseId = common.GetUUID()
|
||||
var created = time.Now().Unix()
|
||||
var toolCallIndex int
|
||||
start := helper.GenerateStartEmptyResponse(responseId, created, model, nil)
|
||||
if data, err := common.Marshal(start); err == nil {
|
||||
_ = helper.StringData(c, string(data))
|
||||
}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" { continue }
|
||||
var chunk ollamaChatStreamChunk
|
||||
if err := json.Unmarshal([]byte(line), &chunk); err != nil {
|
||||
logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line)
|
||||
return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
if chunk.Model != "" { model = chunk.Model }
|
||||
created = toUnix(chunk.CreatedAt)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var chunk ollamaChatStreamChunk
|
||||
if err := json.Unmarshal([]byte(line), &chunk); err != nil {
|
||||
logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line)
|
||||
return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
if chunk.Model != "" {
|
||||
model = chunk.Model
|
||||
}
|
||||
created = toUnix(chunk.CreatedAt)
|
||||
|
||||
if !chunk.Done {
|
||||
// delta content
|
||||
var content string
|
||||
if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response }
|
||||
delta := dto.ChatCompletionsStreamResponse{
|
||||
Id: responseId,
|
||||
Object: "chat.completion.chunk",
|
||||
Created: created,
|
||||
Model: model,
|
||||
Choices: []dto.ChatCompletionsStreamResponseChoice{ {
|
||||
Index: 0,
|
||||
Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" },
|
||||
} },
|
||||
}
|
||||
if content != "" { delta.Choices[0].Delta.SetContentString(content) }
|
||||
if chunk.Message != nil && len(chunk.Message.Thinking) > 0 {
|
||||
raw := strings.TrimSpace(string(chunk.Message.Thinking))
|
||||
if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) }
|
||||
}
|
||||
// tool calls
|
||||
if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 {
|
||||
delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls))
|
||||
for _, tc := range chunk.Message.ToolCalls {
|
||||
// arguments -> string
|
||||
argBytes, _ := json.Marshal(tc.Function.Arguments)
|
||||
toolId := fmt.Sprintf("call_%d", toolCallIndex)
|
||||
tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}}
|
||||
tr.SetIndex(toolCallIndex)
|
||||
toolCallIndex++
|
||||
delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr)
|
||||
}
|
||||
}
|
||||
if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) }
|
||||
continue
|
||||
}
|
||||
// done frame
|
||||
// finalize once and break loop
|
||||
usage.PromptTokens = chunk.PromptEvalCount
|
||||
usage.CompletionTokens = chunk.EvalCount
|
||||
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||||
finishReason := chunk.DoneReason
|
||||
if finishReason == "" { finishReason = "stop" }
|
||||
// emit stop delta
|
||||
if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil {
|
||||
if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) }
|
||||
}
|
||||
// emit usage frame
|
||||
if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil {
|
||||
if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) }
|
||||
}
|
||||
// send [DONE]
|
||||
helper.Done(c)
|
||||
break
|
||||
}
|
||||
if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) }
|
||||
return usage, nil
|
||||
if !chunk.Done {
|
||||
// delta content
|
||||
var content string
|
||||
if chunk.Message != nil {
|
||||
content = chunk.Message.Content
|
||||
} else {
|
||||
content = chunk.Response
|
||||
}
|
||||
delta := dto.ChatCompletionsStreamResponse{
|
||||
Id: responseId,
|
||||
Object: "chat.completion.chunk",
|
||||
Created: created,
|
||||
Model: model,
|
||||
Choices: []dto.ChatCompletionsStreamResponseChoice{{
|
||||
Index: 0,
|
||||
Delta: dto.ChatCompletionsStreamResponseChoiceDelta{Role: "assistant"},
|
||||
}},
|
||||
}
|
||||
if content != "" {
|
||||
delta.Choices[0].Delta.SetContentString(content)
|
||||
}
|
||||
if chunk.Message != nil && len(chunk.Message.Thinking) > 0 {
|
||||
raw := strings.TrimSpace(string(chunk.Message.Thinking))
|
||||
if raw != "" && raw != "null" {
|
||||
delta.Choices[0].Delta.SetReasoningContent(raw)
|
||||
}
|
||||
}
|
||||
// tool calls
|
||||
if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 {
|
||||
delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse, 0, len(chunk.Message.ToolCalls))
|
||||
for _, tc := range chunk.Message.ToolCalls {
|
||||
// arguments -> string
|
||||
argBytes, _ := json.Marshal(tc.Function.Arguments)
|
||||
toolId := fmt.Sprintf("call_%d", toolCallIndex)
|
||||
tr := dto.ToolCallResponse{ID: toolId, Type: "function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}}
|
||||
tr.SetIndex(toolCallIndex)
|
||||
toolCallIndex++
|
||||
delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr)
|
||||
}
|
||||
}
|
||||
if data, err := common.Marshal(delta); err == nil {
|
||||
_ = helper.StringData(c, string(data))
|
||||
}
|
||||
continue
|
||||
}
|
||||
// done frame
|
||||
// finalize once and break loop
|
||||
usage.PromptTokens = chunk.PromptEvalCount
|
||||
usage.CompletionTokens = chunk.EvalCount
|
||||
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||||
finishReason := chunk.DoneReason
|
||||
if finishReason == "" {
|
||||
finishReason = "stop"
|
||||
}
|
||||
// emit stop delta
|
||||
if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil {
|
||||
if data, err := common.Marshal(stop); err == nil {
|
||||
_ = helper.StringData(c, string(data))
|
||||
}
|
||||
}
|
||||
// emit usage frame
|
||||
if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil {
|
||||
if data, err := common.Marshal(final); err == nil {
|
||||
_ = helper.StringData(c, string(data))
|
||||
}
|
||||
}
|
||||
// send [DONE]
|
||||
helper.Done(c)
|
||||
break
|
||||
}
|
||||
if err := scanner.Err(); err != nil && err != io.EOF {
|
||||
logger.LogError(c, "ollama stream scan error: "+err.Error())
|
||||
}
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
// non-stream handler for chat/generate
|
||||
func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) }
|
||||
service.CloseResponseBodyGracefully(resp)
|
||||
raw := string(body)
|
||||
if common.DebugEnabled { println("ollama non-stream raw resp:", raw) }
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
|
||||
}
|
||||
service.CloseResponseBodyGracefully(resp)
|
||||
raw := string(body)
|
||||
if common.DebugEnabled {
|
||||
println("ollama non-stream raw resp:", raw)
|
||||
}
|
||||
|
||||
lines := strings.Split(raw, "\n")
|
||||
var (
|
||||
aggContent strings.Builder
|
||||
reasoningBuilder strings.Builder
|
||||
lastChunk ollamaChatStreamChunk
|
||||
parsedAny bool
|
||||
)
|
||||
for _, ln := range lines {
|
||||
ln = strings.TrimSpace(ln)
|
||||
if ln == "" { continue }
|
||||
var ck ollamaChatStreamChunk
|
||||
if err := json.Unmarshal([]byte(ln), &ck); err != nil {
|
||||
if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) }
|
||||
continue
|
||||
}
|
||||
parsedAny = true
|
||||
lastChunk = ck
|
||||
if ck.Message != nil && len(ck.Message.Thinking) > 0 {
|
||||
raw := strings.TrimSpace(string(ck.Message.Thinking))
|
||||
if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) }
|
||||
}
|
||||
if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) }
|
||||
}
|
||||
lines := strings.Split(raw, "\n")
|
||||
var (
|
||||
aggContent strings.Builder
|
||||
reasoningBuilder strings.Builder
|
||||
lastChunk ollamaChatStreamChunk
|
||||
parsedAny bool
|
||||
)
|
||||
for _, ln := range lines {
|
||||
ln = strings.TrimSpace(ln)
|
||||
if ln == "" {
|
||||
continue
|
||||
}
|
||||
var ck ollamaChatStreamChunk
|
||||
if err := json.Unmarshal([]byte(ln), &ck); err != nil {
|
||||
if len(lines) == 1 {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
continue
|
||||
}
|
||||
parsedAny = true
|
||||
lastChunk = ck
|
||||
if ck.Message != nil && len(ck.Message.Thinking) > 0 {
|
||||
raw := strings.TrimSpace(string(ck.Message.Thinking))
|
||||
if raw != "" && raw != "null" {
|
||||
reasoningBuilder.WriteString(raw)
|
||||
}
|
||||
}
|
||||
if ck.Message != nil && ck.Message.Content != "" {
|
||||
aggContent.WriteString(ck.Message.Content)
|
||||
} else if ck.Response != "" {
|
||||
aggContent.WriteString(ck.Response)
|
||||
}
|
||||
}
|
||||
|
||||
if !parsedAny {
|
||||
var single ollamaChatStreamChunk
|
||||
if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) }
|
||||
lastChunk = single
|
||||
if single.Message != nil {
|
||||
if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } }
|
||||
aggContent.WriteString(single.Message.Content)
|
||||
} else { aggContent.WriteString(single.Response) }
|
||||
}
|
||||
if !parsedAny {
|
||||
var single ollamaChatStreamChunk
|
||||
if err := json.Unmarshal(body, &single); err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
lastChunk = single
|
||||
if single.Message != nil {
|
||||
if len(single.Message.Thinking) > 0 {
|
||||
raw := strings.TrimSpace(string(single.Message.Thinking))
|
||||
if raw != "" && raw != "null" {
|
||||
reasoningBuilder.WriteString(raw)
|
||||
}
|
||||
}
|
||||
aggContent.WriteString(single.Message.Content)
|
||||
} else {
|
||||
aggContent.WriteString(single.Response)
|
||||
}
|
||||
}
|
||||
|
||||
model := lastChunk.Model
|
||||
if model == "" { model = info.UpstreamModelName }
|
||||
created := toUnix(lastChunk.CreatedAt)
|
||||
usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount}
|
||||
content := aggContent.String()
|
||||
finishReason := lastChunk.DoneReason
|
||||
if finishReason == "" { finishReason = "stop" }
|
||||
model := lastChunk.Model
|
||||
if model == "" {
|
||||
model = info.UpstreamModelName
|
||||
}
|
||||
created := toUnix(lastChunk.CreatedAt)
|
||||
usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount}
|
||||
content := aggContent.String()
|
||||
finishReason := lastChunk.DoneReason
|
||||
if finishReason == "" {
|
||||
finishReason = "stop"
|
||||
}
|
||||
|
||||
msg := dto.Message{Role: "assistant", Content: contentPtr(content)}
|
||||
if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc }
|
||||
full := dto.OpenAITextResponse{
|
||||
Id: common.GetUUID(),
|
||||
Model: model,
|
||||
Object: "chat.completion",
|
||||
Created: created,
|
||||
Choices: []dto.OpenAITextResponseChoice{ {
|
||||
Index: 0,
|
||||
Message: msg,
|
||||
FinishReason: finishReason,
|
||||
} },
|
||||
Usage: *usage,
|
||||
}
|
||||
out, _ := common.Marshal(full)
|
||||
service.IOCopyBytesGracefully(c, resp, out)
|
||||
return usage, nil
|
||||
msg := dto.Message{Role: "assistant", Content: contentPtr(content)}
|
||||
if rc := reasoningBuilder.String(); rc != "" {
|
||||
msg.ReasoningContent = rc
|
||||
}
|
||||
full := dto.OpenAITextResponse{
|
||||
Id: common.GetUUID(),
|
||||
Model: model,
|
||||
Object: "chat.completion",
|
||||
Created: created,
|
||||
Choices: []dto.OpenAITextResponseChoice{{
|
||||
Index: 0,
|
||||
Message: msg,
|
||||
FinishReason: finishReason,
|
||||
}},
|
||||
Usage: *usage,
|
||||
}
|
||||
out, _ := common.Marshal(full)
|
||||
service.IOCopyBytesGracefully(c, resp, out)
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
func contentPtr(s string) *string { if s=="" { return nil }; return &s }
|
||||
func contentPtr(s string) *string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
return &s
|
||||
}
|
||||
|
||||
@@ -13,4 +13,4 @@ var ModelList = []string{
|
||||
"deepseek-ai/DeepSeek-V3.1",
|
||||
}
|
||||
|
||||
const ChannelName = "submodel"
|
||||
const ChannelName = "submodel"
|
||||
|
||||
Reference in New Issue
Block a user