From 51c4cd9ab52a5b9e7263b3e3ed483594a0900cd3 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:01:14 +0800 Subject: [PATCH 01/11] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84ollama=E6=B8=A0?= =?UTF-8?q?=E9=81=93=E8=AF=B7=E6=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/adaptor.go | 31 ++-- relay/channel/ollama/dto.go | 91 +++++---- relay/channel/ollama/relay-ollama.go | 263 ++++++++++++++++----------- relay/channel/ollama/stream.go | 165 +++++++++++++++++ 4 files changed, 400 insertions(+), 150 deletions(-) create mode 100644 relay/channel/ollama/stream.go diff --git a/relay/channel/ollama/adaptor.go b/relay/channel/ollama/adaptor.go index d6b5b697e..3732be91b 100644 --- a/relay/channel/ollama/adaptor.go +++ b/relay/channel/ollama/adaptor.go @@ -10,6 +10,7 @@ import ( relaycommon "one-api/relay/common" relayconstant "one-api/relay/constant" "one-api/types" + "strings" "github.com/gin-gonic/gin" ) @@ -48,15 +49,15 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) { } func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { - if info.RelayFormat == types.RelayFormatClaude { - return info.ChannelBaseUrl + "/v1/chat/completions", nil - } - switch info.RelayMode { - case relayconstant.RelayModeEmbeddings: + // embeddings fixed endpoint + if info.RelayMode == relayconstant.RelayModeEmbeddings { return info.ChannelBaseUrl + "/api/embed", nil - default: - return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, info.RequestURLPath, info.ChannelType), nil } + // For chat vs generate: if original path contains "/v1/completions" map to generate; otherwise chat + if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions { + return info.ChannelBaseUrl + "/api/generate", nil + } + return info.ChannelBaseUrl + "/api/chat", nil } func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error { @@ -66,10 +67,12 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel } func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) { - if request == nil { - return nil, errors.New("request is nil") + if request == nil { return nil, errors.New("request is nil") } + // decide generate or chat + if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions { + return openAIToGenerate(c, request) } - return requestOpenAI2Ollama(c, request) + return openAIChatToOllamaChat(c, request) } func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) { @@ -92,15 +95,13 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) { switch info.RelayMode { case relayconstant.RelayModeEmbeddings: - usage, err = ollamaEmbeddingHandler(c, info, resp) + return ollamaEmbeddingHandler(c, info, resp) default: if info.IsStream { - usage, err = openai.OaiStreamHandler(c, info, resp) - } else { - usage, err = openai.OpenaiHandler(c, info, resp) + return ollamaStreamHandler(c, info, resp) } + return ollamaChatHandler(c, info, resp) } - return } func (a *Adaptor) GetModelList() []string { diff --git a/relay/channel/ollama/dto.go b/relay/channel/ollama/dto.go index 317c2a4a1..b3d083dce 100644 --- a/relay/channel/ollama/dto.go +++ b/relay/channel/ollama/dto.go @@ -5,45 +5,70 @@ import ( "one-api/dto" ) -type OllamaRequest struct { - Model string `json:"model,omitempty"` - Messages []dto.Message `json:"messages,omitempty"` - Stream bool `json:"stream,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - Seed float64 `json:"seed,omitempty"` - Topp float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Stop any `json:"stop,omitempty"` - MaxTokens uint `json:"max_tokens,omitempty"` - Tools []dto.ToolCallRequest `json:"tools,omitempty"` - ResponseFormat any `json:"response_format,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` - Suffix any `json:"suffix,omitempty"` - StreamOptions *dto.StreamOptions `json:"stream_options,omitempty"` - Prompt any `json:"prompt,omitempty"` - Think json.RawMessage `json:"think,omitempty"` +// OllamaChatMessage represents a single chat message +type OllamaChatMessage struct { + Role string `json:"role"` + Content string `json:"content,omitempty"` + Images []string `json:"images,omitempty"` + ToolCalls []OllamaToolCall `json:"tool_calls,omitempty"` + ToolName string `json:"tool_name,omitempty"` + Thinking json.RawMessage `json:"thinking,omitempty"` } -type Options struct { - Seed int `json:"seed,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopK int `json:"top_k,omitempty"` - TopP float64 `json:"top_p,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` - NumPredict int `json:"num_predict,omitempty"` - NumCtx int `json:"num_ctx,omitempty"` +type OllamaToolFunction struct { + Name string `json:"name"` + Description string `json:"description,omitempty"` + Parameters interface{} `json:"parameters,omitempty"` +} + +type OllamaTool struct { + Type string `json:"type"` + Function OllamaToolFunction `json:"function"` +} + +type OllamaToolCall struct { + Function struct { + Name string `json:"name"` + Arguments interface{} `json:"arguments"` + } `json:"function"` +} + +// OllamaChatRequest -> /api/chat +type OllamaChatRequest struct { + Model string `json:"model"` + Messages []OllamaChatMessage `json:"messages"` + Tools interface{} `json:"tools,omitempty"` + Format interface{} `json:"format,omitempty"` + Stream bool `json:"stream,omitempty"` + Options map[string]any `json:"options,omitempty"` + KeepAlive interface{} `json:"keep_alive,omitempty"` + Think json.RawMessage `json:"think,omitempty"` +} + +// OllamaGenerateRequest -> /api/generate +type OllamaGenerateRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt,omitempty"` + Suffix string `json:"suffix,omitempty"` + Images []string `json:"images,omitempty"` + Format interface{} `json:"format,omitempty"` + Stream bool `json:"stream,omitempty"` + Options map[string]any `json:"options,omitempty"` + KeepAlive interface{} `json:"keep_alive,omitempty"` + Think json.RawMessage `json:"think,omitempty"` } type OllamaEmbeddingRequest struct { - Model string `json:"model,omitempty"` - Input []string `json:"input"` - Options *Options `json:"options,omitempty"` + Model string `json:"model"` + Input interface{} `json:"input"` + Options map[string]any `json:"options,omitempty"` + Dimensions int `json:"dimensions,omitempty"` } type OllamaEmbeddingResponse struct { - Error string `json:"error,omitempty"` - Model string `json:"model"` - Embedding [][]float64 `json:"embeddings,omitempty"` + Error string `json:"error,omitempty"` + Model string `json:"model"` + Embeddings [][]float64 `json:"embeddings"` + PromptEvalCount int `json:"prompt_eval_count,omitempty"` } + diff --git a/relay/channel/ollama/relay-ollama.go b/relay/channel/ollama/relay-ollama.go index 27c67b4ec..897e22cbd 100644 --- a/relay/channel/ollama/relay-ollama.go +++ b/relay/channel/ollama/relay-ollama.go @@ -1,6 +1,7 @@ package ollama import ( + "encoding/json" "fmt" "io" "net/http" @@ -14,121 +15,179 @@ import ( "github.com/gin-gonic/gin" ) -func requestOpenAI2Ollama(c *gin.Context, request *dto.GeneralOpenAIRequest) (*OllamaRequest, error) { - messages := make([]dto.Message, 0, len(request.Messages)) - for _, message := range request.Messages { - if !message.IsStringContent() { - mediaMessages := message.ParseContent() - for j, mediaMessage := range mediaMessages { - if mediaMessage.Type == dto.ContentTypeImageURL { - imageUrl := mediaMessage.GetImageMedia() - // check if not base64 - if strings.HasPrefix(imageUrl.Url, "http") { - fileData, err := service.GetFileBase64FromUrl(c, imageUrl.Url, "formatting image for Ollama") - if err != nil { - return nil, err +// openAIChatToOllamaChat converts OpenAI-style chat request to Ollama chat +func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*OllamaChatRequest, error) { + chatReq := &OllamaChatRequest{ + Model: r.Model, + Stream: r.Stream, + Options: map[string]any{}, + Think: r.Think, + } + // format mapping + if r.ResponseFormat != nil { + if r.ResponseFormat.Type == "json" { + chatReq.Format = "json" + } else if r.ResponseFormat.Type == "json_schema" { + // supply schema object directly + if len(r.ResponseFormat.JsonSchema) > 0 { + var schema any + _ = json.Unmarshal(r.ResponseFormat.JsonSchema, &schema) + chatReq.Format = schema + } + } + } + + // options mapping + if r.Temperature != nil { chatReq.Options["temperature"] = r.Temperature } + if r.TopP != 0 { chatReq.Options["top_p"] = r.TopP } + if r.TopK != 0 { chatReq.Options["top_k"] = r.TopK } + if r.FrequencyPenalty != 0 { chatReq.Options["frequency_penalty"] = r.FrequencyPenalty } + if r.PresencePenalty != 0 { chatReq.Options["presence_penalty"] = r.PresencePenalty } + if r.Seed != 0 { chatReq.Options["seed"] = int(r.Seed) } + if mt := r.GetMaxTokens(); mt != 0 { chatReq.Options["num_predict"] = int(mt) } + + // Stop -> options.stop (array) + if r.Stop != nil { + switch v := r.Stop.(type) { + case string: + chatReq.Options["stop"] = []string{v} + case []string: + chatReq.Options["stop"] = v + case []any: + arr := make([]string,0,len(v)) + for _, i := range v { if s,ok:=i.(string); ok { arr = append(arr,s) } } + if len(arr)>0 { chatReq.Options["stop"] = arr } + } + } + + // tools + if len(r.Tools) > 0 { + tools := make([]OllamaTool,0,len(r.Tools)) + for _, t := range r.Tools { + tools = append(tools, OllamaTool{Type: "function", Function: OllamaToolFunction{Name: t.Function.Name, Description: t.Function.Description, Parameters: t.Function.Parameters}}) + } + chatReq.Tools = tools + } + + // messages + chatReq.Messages = make([]OllamaChatMessage,0,len(r.Messages)) + for _, m := range r.Messages { + // gather text parts & images + var textBuilder strings.Builder + var images []string + if m.IsStringContent() { + textBuilder.WriteString(m.StringContent()) + } else { + parts := m.ParseContent() + for _, part := range parts { + if part.Type == dto.ContentTypeImageURL { + img := part.GetImageMedia() + if img != nil && img.Url != "" { + // ensure base64 dataURL + if strings.HasPrefix(img.Url, "http") { + fileData, err := service.GetFileBase64FromUrl(c, img.Url, "fetch image for ollama chat") + if err != nil { return nil, err } + img.Url = fmt.Sprintf("data:%s;base64,%s", fileData.MimeType, fileData.Base64Data) } - imageUrl.Url = fmt.Sprintf("data:%s;base64,%s", fileData.MimeType, fileData.Base64Data) + images = append(images, img.Url) } - mediaMessage.ImageUrl = imageUrl - mediaMessages[j] = mediaMessage + } else if part.Type == dto.ContentTypeText { + textBuilder.WriteString(part.Text) } } - message.SetMediaContent(mediaMessages) } - messages = append(messages, dto.Message{ - Role: message.Role, - Content: message.Content, - ToolCalls: message.ToolCalls, - ToolCallId: message.ToolCallId, - }) + cm := OllamaChatMessage{Role: m.Role, Content: textBuilder.String()} + if len(images)>0 { cm.Images = images } + // history tool call result message + if m.Role == "tool" && m.Name != nil { cm.ToolName = *m.Name } + // tool calls from assistant previous message + if len(m.ToolCalls)>0 { + calls := make([]OllamaToolCall,0,len(m.ToolCalls)) + for _, tc := range m.ToolCalls { + var args interface{} + if tc.Function.Arguments != "" { _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) } + oc := OllamaToolCall{} + oc.Function.Name = tc.Function.Name + if args==nil { args = map[string]any{} } + oc.Function.Arguments = args + calls = append(calls, oc) + } + cm.ToolCalls = calls + } + chatReq.Messages = append(chatReq.Messages, cm) } - str, ok := request.Stop.(string) - var Stop []string - if ok { - Stop = []string{str} - } else { - Stop, _ = request.Stop.([]string) - } - ollamaRequest := &OllamaRequest{ - Model: request.Model, - Messages: messages, - Stream: request.Stream, - Temperature: request.Temperature, - Seed: request.Seed, - Topp: request.TopP, - TopK: request.TopK, - Stop: Stop, - Tools: request.Tools, - MaxTokens: request.GetMaxTokens(), - ResponseFormat: request.ResponseFormat, - FrequencyPenalty: request.FrequencyPenalty, - PresencePenalty: request.PresencePenalty, - Prompt: request.Prompt, - StreamOptions: request.StreamOptions, - Suffix: request.Suffix, - } - ollamaRequest.Think = request.Think - return ollamaRequest, nil + return chatReq, nil } -func requestOpenAI2Embeddings(request dto.EmbeddingRequest) *OllamaEmbeddingRequest { - return &OllamaEmbeddingRequest{ - Model: request.Model, - Input: request.ParseInput(), - Options: &Options{ - Seed: int(request.Seed), - Temperature: request.Temperature, - TopP: request.TopP, - FrequencyPenalty: request.FrequencyPenalty, - PresencePenalty: request.PresencePenalty, - }, +// openAIToGenerate converts OpenAI completions request to Ollama generate +func openAIToGenerate(c *gin.Context, r *dto.GeneralOpenAIRequest) (*OllamaGenerateRequest, error) { + gen := &OllamaGenerateRequest{ + Model: r.Model, + Stream: r.Stream, + Options: map[string]any{}, + Think: r.Think, } + // Prompt may be in r.Prompt (string or []any) + if r.Prompt != nil { + switch v := r.Prompt.(type) { + case string: + gen.Prompt = v + case []any: + var sb strings.Builder + for _, it := range v { if s,ok:=it.(string); ok { sb.WriteString(s) } } + gen.Prompt = sb.String() + default: + gen.Prompt = fmt.Sprintf("%v", r.Prompt) + } + } + if r.Suffix != nil { if s,ok:=r.Suffix.(string); ok { gen.Suffix = s } } + if r.ResponseFormat != nil { + if r.ResponseFormat.Type == "json" { gen.Format = "json" } else if r.ResponseFormat.Type == "json_schema" { var schema any; _ = json.Unmarshal(r.ResponseFormat.JsonSchema,&schema); gen.Format=schema } + } + if r.Temperature != nil { gen.Options["temperature"] = r.Temperature } + if r.TopP != 0 { gen.Options["top_p"] = r.TopP } + if r.TopK != 0 { gen.Options["top_k"] = r.TopK } + if r.FrequencyPenalty != 0 { gen.Options["frequency_penalty"] = r.FrequencyPenalty } + if r.PresencePenalty != 0 { gen.Options["presence_penalty"] = r.PresencePenalty } + if r.Seed != 0 { gen.Options["seed"] = int(r.Seed) } + if mt := r.GetMaxTokens(); mt != 0 { gen.Options["num_predict"] = int(mt) } + if r.Stop != nil { + switch v := r.Stop.(type) { + case string: gen.Options["stop"] = []string{v} + case []string: gen.Options["stop"] = v + case []any: arr:=make([]string,0,len(v)); for _,i:= range v { if s,ok:=i.(string); ok { arr=append(arr,s) } }; if len(arr)>0 { gen.Options["stop"]=arr } + } + } + return gen, nil +} + +func requestOpenAI2Embeddings(r dto.EmbeddingRequest) *OllamaEmbeddingRequest { + opts := map[string]any{} + if r.Temperature != nil { opts["temperature"] = r.Temperature } + if r.TopP != 0 { opts["top_p"] = r.TopP } + if r.TopK != 0 { opts["top_k"] = r.TopK } + if r.FrequencyPenalty != 0 { opts["frequency_penalty"] = r.FrequencyPenalty } + if r.PresencePenalty != 0 { opts["presence_penalty"] = r.PresencePenalty } + if r.Seed != 0 { opts["seed"] = int(r.Seed) } + if r.Dimensions != 0 { opts["dimensions"] = r.Dimensions } + input := r.ParseInput() + if len(input)==1 { return &OllamaEmbeddingRequest{Model:r.Model, Input: input[0], Options: opts, Dimensions:r.Dimensions} } + return &OllamaEmbeddingRequest{Model:r.Model, Input: input, Options: opts, Dimensions:r.Dimensions} } func ollamaEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { - var ollamaEmbeddingResponse OllamaEmbeddingResponse - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } + var oResp OllamaEmbeddingResponse + body, err := io.ReadAll(resp.Body) + if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } service.CloseResponseBodyGracefully(resp) - err = common.Unmarshal(responseBody, &ollamaEmbeddingResponse) - if err != nil { - return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } - if ollamaEmbeddingResponse.Error != "" { - return nil, types.NewOpenAIError(fmt.Errorf("ollama error: %s", ollamaEmbeddingResponse.Error), types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } - flattenedEmbeddings := flattenEmbeddings(ollamaEmbeddingResponse.Embedding) - data := make([]dto.OpenAIEmbeddingResponseItem, 0, 1) - data = append(data, dto.OpenAIEmbeddingResponseItem{ - Embedding: flattenedEmbeddings, - Object: "embedding", - }) - usage := &dto.Usage{ - TotalTokens: info.PromptTokens, - CompletionTokens: 0, - PromptTokens: info.PromptTokens, - } - embeddingResponse := &dto.OpenAIEmbeddingResponse{ - Object: "list", - Data: data, - Model: info.UpstreamModelName, - Usage: *usage, - } - doResponseBody, err := common.Marshal(embeddingResponse) - if err != nil { - return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } - service.IOCopyBytesGracefully(c, resp, doResponseBody) + if err = common.Unmarshal(body, &oResp); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + if oResp.Error != "" { return nil, types.NewOpenAIError(fmt.Errorf("ollama error: %s", oResp.Error), types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + data := make([]dto.OpenAIEmbeddingResponseItem,0,len(oResp.Embeddings)) + for i, emb := range oResp.Embeddings { data = append(data, dto.OpenAIEmbeddingResponseItem{Index:i,Object:"embedding",Embedding:emb}) } + usage := &dto.Usage{PromptTokens: oResp.PromptEvalCount, CompletionTokens:0, TotalTokens: oResp.PromptEvalCount} + embResp := &dto.OpenAIEmbeddingResponse{Object:"list", Data:data, Model: info.UpstreamModelName, Usage:*usage} + out, _ := common.Marshal(embResp) + service.IOCopyBytesGracefully(c, resp, out) return usage, nil } -func flattenEmbeddings(embeddings [][]float64) []float64 { - flattened := []float64{} - for _, row := range embeddings { - flattened = append(flattened, row...) - } - return flattened -} diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go new file mode 100644 index 000000000..3ae9c6d04 --- /dev/null +++ b/relay/channel/ollama/stream.go @@ -0,0 +1,165 @@ +package ollama + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "net/http" + "one-api/common" + "one-api/dto" + "one-api/logger" + relaycommon "one-api/relay/common" + "one-api/relay/helper" + "one-api/service" + "one-api/types" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +// Ollama streaming chunk (chat or generate) +type ollamaChatStreamChunk struct { + Model string `json:"model"` + CreatedAt string `json:"created_at"` + // chat + Message *struct { + Role string `json:"role"` + Content string `json:"content"` + ToolCalls []struct { `json:"tool_calls"` + Function struct { + Name string `json:"name"` + Arguments interface{} `json:"arguments"` + } `json:"function"` + } `json:"tool_calls"` + } `json:"message"` + // generate + Response string `json:"response"` + Done bool `json:"done"` + DoneReason string `json:"done_reason"` + TotalDuration int64 `json:"total_duration"` + LoadDuration int64 `json:"load_duration"` + PromptEvalCount int `json:"prompt_eval_count"` + EvalCount int `json:"eval_count"` + // generate mode may use these + PromptEvalDuration int64 `json:"prompt_eval_duration"` + EvalDuration int64 `json:"eval_duration"` +} + +func toUnix(ts string) int64 { // parse RFC3339 / variant; fallback time.Now + if ts == "" { return time.Now().Unix() } + // try time.RFC3339 or with nanoseconds + t, err := time.Parse(time.RFC3339Nano, ts) + if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() } + return t.Unix() +} + +// streaming handler: convert Ollama stream -> OpenAI SSE +func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } + defer service.CloseResponseBodyGracefully(resp) + + helper.SetEventStreamHeaders(c) + scanner := bufio.NewScanner(resp.Body) + usage := &dto.Usage{} + var model = info.UpstreamModelName + var responseId = common.GetUUID() + var created = time.Now().Unix() + var aggregatedText strings.Builder + var toolCallIndex int + // send start event + start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) + if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } + + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimSpace(line) + if line == "" { continue } + var chunk ollamaChatStreamChunk + if err := json.Unmarshal([]byte(line), &chunk); err != nil { + logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line) + return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + if chunk.Model != "" { model = chunk.Model } + created = toUnix(chunk.CreatedAt) + + if !chunk.Done { + // delta content + var content string + if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } + if content != "" { aggregatedText.WriteString(content) } + delta := dto.ChatCompletionsStreamResponse{ + Id: responseId, + Object: "chat.completion.chunk", + Created: created, + Model: model, + Choices: []dto.ChatCompletionsStreamResponseChoice{ { + Index: 0, + Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" }, + } }, + } + if content != "" { delta.Choices[0].Delta.SetContentString(content) } + // tool calls + if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { + delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) + for _, tc := range chunk.Message.ToolCalls { + // arguments -> string + argBytes, _ := json.Marshal(tc.Function.Arguments) + tr := dto.ToolCallResponse{ID:"", Type:nil, Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} + tr.SetIndex(toolCallIndex) + toolCallIndex++ + delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) + } + } + if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) } + continue + } + // done frame + usage.PromptTokens = chunk.PromptEvalCount + usage.CompletionTokens = chunk.EvalCount + usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens + finishReason := chunk.DoneReason + if finishReason == "" { finishReason = "stop" } + stop := helper.GenerateStopResponse(responseId, created, model, finishReason) + if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } + final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage) + if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + } + if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } + return usage, nil +} + +// non-stream handler for chat/generate +func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + body, err := io.ReadAll(resp.Body) + if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } + service.CloseResponseBodyGracefully(resp) + if common.DebugEnabled { println("ollama non-stream resp:", string(body)) } + var chunk ollamaChatStreamChunk + if err = json.Unmarshal(body, &chunk); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + model := chunk.Model + if model == "" { model = info.UpstreamModelName } + created := toUnix(chunk.CreatedAt) + content := "" + if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } + usage := &dto.Usage{PromptTokens: chunk.PromptEvalCount, CompletionTokens: chunk.EvalCount, TotalTokens: chunk.PromptEvalCount + chunk.EvalCount} + // Build OpenAI style response + full := dto.OpenAITextResponse{ + Id: common.GetUUID(), + Model: model, + Object: "chat.completion", + Created: created, + Choices: []dto.OpenAITextResponseChoice{ { + Index: 0, + Message: dto.Message{Role: "assistant", Content: contentPtr(content)}, + FinishReason: func() string { if chunk.DoneReason == "" { return "stop" } ; return chunk.DoneReason }(), + } }, + Usage: *usage, + } + out, _ := common.Marshal(full) + service.IOCopyBytesGracefully(c, resp, out) + return usage, nil +} + +func contentPtr(s string) *string { if s=="" { return nil }; return &s } From fc38c480a1fd764bc5b9d0d7c3fd07acd7bf0694 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:09:10 +0800 Subject: [PATCH 02/11] =?UTF-8?q?fix:=20=E4=BC=98=E5=8C=96ollamaChatStream?= =?UTF-8?q?Chunk=E7=BB=93=E6=9E=84=E4=BD=93=E5=AD=97=E6=AE=B5=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index 3ae9c6d04..db615e8bd 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -25,9 +25,9 @@ type ollamaChatStreamChunk struct { CreatedAt string `json:"created_at"` // chat Message *struct { - Role string `json:"role"` - Content string `json:"content"` - ToolCalls []struct { `json:"tool_calls"` + Role string `json:"role"` + Content string `json:"content"` + ToolCalls []struct { Function struct { Name string `json:"name"` Arguments interface{} `json:"arguments"` @@ -66,7 +66,6 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http var model = info.UpstreamModelName var responseId = common.GetUUID() var created = time.Now().Unix() - var aggregatedText strings.Builder var toolCallIndex int // send start event start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) From 7d6ba52d85fbe88244b1e8e5c4866b63da3cae6a Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:15:46 +0800 Subject: [PATCH 03/11] =?UTF-8?q?refactor:=20=E6=9B=B4=E6=96=B0=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E8=BD=AC=E6=8D=A2=E9=80=BB=E8=BE=91=EF=BC=8C=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E5=B7=A5=E5=85=B7=E8=B0=83=E7=94=A8=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/adaptor.go | 3 ++- relay/channel/ollama/dto.go | 1 - relay/channel/ollama/relay-ollama.go | 26 ++++++++++++++------------ relay/channel/ollama/stream.go | 1 - 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/relay/channel/ollama/adaptor.go b/relay/channel/ollama/adaptor.go index 3732be91b..d66839f7b 100644 --- a/relay/channel/ollama/adaptor.go +++ b/relay/channel/ollama/adaptor.go @@ -32,7 +32,8 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn openaiRequest.(*dto.GeneralOpenAIRequest).StreamOptions = &dto.StreamOptions{ IncludeUsage: true, } - return requestOpenAI2Ollama(c, openaiRequest.(*dto.GeneralOpenAIRequest)) + // map to ollama chat request (Claude -> OpenAI -> Ollama chat) + return openAIChatToOllamaChat(c, openaiRequest.(*dto.GeneralOpenAIRequest)) } func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { diff --git a/relay/channel/ollama/dto.go b/relay/channel/ollama/dto.go index b3d083dce..a3e325e2f 100644 --- a/relay/channel/ollama/dto.go +++ b/relay/channel/ollama/dto.go @@ -2,7 +2,6 @@ package ollama import ( "encoding/json" - "one-api/dto" ) // OllamaChatMessage represents a single chat message diff --git a/relay/channel/ollama/relay-ollama.go b/relay/channel/ollama/relay-ollama.go index 897e22cbd..45424633c 100644 --- a/relay/channel/ollama/relay-ollama.go +++ b/relay/channel/ollama/relay-ollama.go @@ -101,18 +101,21 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam // history tool call result message if m.Role == "tool" && m.Name != nil { cm.ToolName = *m.Name } // tool calls from assistant previous message - if len(m.ToolCalls)>0 { - calls := make([]OllamaToolCall,0,len(m.ToolCalls)) - for _, tc := range m.ToolCalls { - var args interface{} - if tc.Function.Arguments != "" { _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) } - oc := OllamaToolCall{} - oc.Function.Name = tc.Function.Name - if args==nil { args = map[string]any{} } - oc.Function.Arguments = args - calls = append(calls, oc) + if m.ToolCalls != nil && len(m.ToolCalls) > 0 { + parsed := m.ParseToolCalls() + if len(parsed) > 0 { + calls := make([]OllamaToolCall,0,len(parsed)) + for _, tc := range parsed { + var args interface{} + if tc.Function.Arguments != "" { _ = json.Unmarshal([]byte(tc.Function.Arguments), &args) } + if args==nil { args = map[string]any{} } + oc := OllamaToolCall{} + oc.Function.Name = tc.Function.Name + oc.Function.Arguments = args + calls = append(calls, oc) + } + cm.ToolCalls = calls } - cm.ToolCalls = calls } chatReq.Messages = append(chatReq.Messages, cm) } @@ -165,7 +168,6 @@ func requestOpenAI2Embeddings(r dto.EmbeddingRequest) *OllamaEmbeddingRequest { opts := map[string]any{} if r.Temperature != nil { opts["temperature"] = r.Temperature } if r.TopP != 0 { opts["top_p"] = r.TopP } - if r.TopK != 0 { opts["top_k"] = r.TopK } if r.FrequencyPenalty != 0 { opts["frequency_penalty"] = r.FrequencyPenalty } if r.PresencePenalty != 0 { opts["presence_penalty"] = r.PresencePenalty } if r.Seed != 0 { opts["seed"] = int(r.Seed) } diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index db615e8bd..d5b104d6f 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -87,7 +87,6 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http // delta content var content string if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } - if content != "" { aggregatedText.WriteString(content) } delta := dto.ChatCompletionsStreamResponse{ Id: responseId, Object: "chat.completion.chunk", From 176fd6eda13537b57e6e1dc4c0f718a5ad523498 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:23:53 +0800 Subject: [PATCH 04/11] =?UTF-8?q?fix:=20=E4=BC=98=E5=8C=96ollamaStreamHand?= =?UTF-8?q?ler=E4=B8=AD=E7=9A=84=E5=81=9C=E6=AD=A2=E5=92=8C=E6=9C=80?= =?UTF-8?q?=E7=BB=88=E4=BD=BF=E7=94=A8=E5=93=8D=E5=BA=94=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index d5b104d6f..4e17f12d7 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -114,15 +114,23 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http continue } // done frame + // finalize once and break loop usage.PromptTokens = chunk.PromptEvalCount usage.CompletionTokens = chunk.EvalCount usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens finishReason := chunk.DoneReason if finishReason == "" { finishReason = "stop" } - stop := helper.GenerateStopResponse(responseId, created, model, finishReason) - if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } - final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage) - if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + // emit stop delta + if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { + if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } + } + // emit usage frame + if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { + if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + } + // send [DONE] + helper.Done(c) + break } if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } return usage, nil From f7d393fc721278774469b3e25eca2df35f25127f Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:41:09 +0800 Subject: [PATCH 05/11] =?UTF-8?q?refactor:=20=E7=AE=80=E5=8C=96=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E8=BD=AC=E6=8D=A2=E5=87=BD=E6=95=B0=E5=92=8C=E6=B5=81?= =?UTF-8?q?=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/adaptor.go | 32 ++++------------ relay/channel/ollama/dto.go | 3 -- relay/channel/ollama/relay-ollama.go | 9 ----- relay/channel/ollama/stream.go | 56 +++++++++++++++++++++------- 4 files changed, 49 insertions(+), 51 deletions(-) diff --git a/relay/channel/ollama/adaptor.go b/relay/channel/ollama/adaptor.go index d66839f7b..bafe73b92 100644 --- a/relay/channel/ollama/adaptor.go +++ b/relay/channel/ollama/adaptor.go @@ -18,10 +18,7 @@ import ( type Adaptor struct { } -func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) { - //TODO implement me - return nil, errors.New("not implemented") -} +func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) { return nil, errors.New("not implemented") } func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error) { openaiAdaptor := openai.Adaptor{} @@ -36,29 +33,17 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn return openAIChatToOllamaChat(c, openaiRequest.(*dto.GeneralOpenAIRequest)) } -func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { - //TODO implement me - return nil, errors.New("not implemented") -} +func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { return nil, errors.New("not implemented") } -func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) { - //TODO implement me - return nil, errors.New("not implemented") -} +func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) { return nil, errors.New("not implemented") } func (a *Adaptor) Init(info *relaycommon.RelayInfo) { } func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { - // embeddings fixed endpoint - if info.RelayMode == relayconstant.RelayModeEmbeddings { - return info.ChannelBaseUrl + "/api/embed", nil - } - // For chat vs generate: if original path contains "/v1/completions" map to generate; otherwise chat - if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions { - return info.ChannelBaseUrl + "/api/generate", nil - } - return info.ChannelBaseUrl + "/api/chat", nil + if info.RelayMode == relayconstant.RelayModeEmbeddings { return info.ChannelBaseUrl + "/api/embed", nil } + if strings.Contains(info.RequestURLPath, "/v1/completions") || info.RelayMode == relayconstant.RelayModeCompletions { return info.ChannelBaseUrl + "/api/generate", nil } + return info.ChannelBaseUrl + "/api/chat", nil } func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error { @@ -84,10 +69,7 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela return requestOpenAI2Embeddings(request), nil } -func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) { - // TODO implement me - return nil, errors.New("not implemented") -} +func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) { return nil, errors.New("not implemented") } func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) { return channel.DoApiRequest(a, c, info, requestBody) diff --git a/relay/channel/ollama/dto.go b/relay/channel/ollama/dto.go index a3e325e2f..45e49ab43 100644 --- a/relay/channel/ollama/dto.go +++ b/relay/channel/ollama/dto.go @@ -4,7 +4,6 @@ import ( "encoding/json" ) -// OllamaChatMessage represents a single chat message type OllamaChatMessage struct { Role string `json:"role"` Content string `json:"content,omitempty"` @@ -32,7 +31,6 @@ type OllamaToolCall struct { } `json:"function"` } -// OllamaChatRequest -> /api/chat type OllamaChatRequest struct { Model string `json:"model"` Messages []OllamaChatMessage `json:"messages"` @@ -44,7 +42,6 @@ type OllamaChatRequest struct { Think json.RawMessage `json:"think,omitempty"` } -// OllamaGenerateRequest -> /api/generate type OllamaGenerateRequest struct { Model string `json:"model"` Prompt string `json:"prompt,omitempty"` diff --git a/relay/channel/ollama/relay-ollama.go b/relay/channel/ollama/relay-ollama.go index 45424633c..c79f98760 100644 --- a/relay/channel/ollama/relay-ollama.go +++ b/relay/channel/ollama/relay-ollama.go @@ -15,7 +15,6 @@ import ( "github.com/gin-gonic/gin" ) -// openAIChatToOllamaChat converts OpenAI-style chat request to Ollama chat func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*OllamaChatRequest, error) { chatReq := &OllamaChatRequest{ Model: r.Model, @@ -23,12 +22,10 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam Options: map[string]any{}, Think: r.Think, } - // format mapping if r.ResponseFormat != nil { if r.ResponseFormat.Type == "json" { chatReq.Format = "json" } else if r.ResponseFormat.Type == "json_schema" { - // supply schema object directly if len(r.ResponseFormat.JsonSchema) > 0 { var schema any _ = json.Unmarshal(r.ResponseFormat.JsonSchema, &schema) @@ -46,7 +43,6 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam if r.Seed != 0 { chatReq.Options["seed"] = int(r.Seed) } if mt := r.GetMaxTokens(); mt != 0 { chatReq.Options["num_predict"] = int(mt) } - // Stop -> options.stop (array) if r.Stop != nil { switch v := r.Stop.(type) { case string: @@ -60,7 +56,6 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam } } - // tools if len(r.Tools) > 0 { tools := make([]OllamaTool,0,len(r.Tools)) for _, t := range r.Tools { @@ -69,10 +64,8 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam chatReq.Tools = tools } - // messages chatReq.Messages = make([]OllamaChatMessage,0,len(r.Messages)) for _, m := range r.Messages { - // gather text parts & images var textBuilder strings.Builder var images []string if m.IsStringContent() { @@ -98,9 +91,7 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam } cm := OllamaChatMessage{Role: m.Role, Content: textBuilder.String()} if len(images)>0 { cm.Images = images } - // history tool call result message if m.Role == "tool" && m.Name != nil { cm.ToolName = *m.Name } - // tool calls from assistant previous message if m.ToolCalls != nil && len(m.ToolCalls) > 0 { parsed := m.ParseToolCalls() if len(parsed) > 0 { diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index 4e17f12d7..167c676d6 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -19,7 +19,6 @@ import ( "github.com/gin-gonic/gin" ) -// Ollama streaming chunk (chat or generate) type ollamaChatStreamChunk struct { Model string `json:"model"` CreatedAt string `json:"created_at"` @@ -47,7 +46,7 @@ type ollamaChatStreamChunk struct { EvalDuration int64 `json:"eval_duration"` } -func toUnix(ts string) int64 { // parse RFC3339 / variant; fallback time.Now +func toUnix(ts string) int64 { if ts == "" { return time.Now().Unix() } // try time.RFC3339 or with nanoseconds t, err := time.Parse(time.RFC3339Nano, ts) @@ -55,7 +54,6 @@ func toUnix(ts string) int64 { // parse RFC3339 / variant; fallback time.Now return t.Unix() } -// streaming handler: convert Ollama stream -> OpenAI SSE func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } defer service.CloseResponseBodyGracefully(resp) @@ -67,7 +65,6 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http var responseId = common.GetUUID() var created = time.Now().Unix() var toolCallIndex int - // send start event start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } @@ -141,16 +138,47 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R body, err := io.ReadAll(resp.Body) if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } service.CloseResponseBodyGracefully(resp) - if common.DebugEnabled { println("ollama non-stream resp:", string(body)) } - var chunk ollamaChatStreamChunk - if err = json.Unmarshal(body, &chunk); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } - model := chunk.Model + raw := string(body) + if common.DebugEnabled { println("ollama non-stream raw resp:", raw) } + + lines := strings.Split(raw, "\n") + var ( + aggContent strings.Builder + lastChunk ollamaChatStreamChunk + parsedAny bool + ) + for _, ln := range lines { + ln = strings.TrimSpace(ln) + if ln == "" { continue } + var ck ollamaChatStreamChunk + if err := json.Unmarshal([]byte(ln), &ck); err != nil { + if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + continue + } + parsedAny = true + lastChunk = ck + if !ck.Done { + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + } else { + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + } + } + + if !parsedAny { + var single ollamaChatStreamChunk + if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + lastChunk = single + if single.Message != nil { aggContent.WriteString(single.Message.Content) } else { aggContent.WriteString(single.Response) } + } + + model := lastChunk.Model if model == "" { model = info.UpstreamModelName } - created := toUnix(chunk.CreatedAt) - content := "" - if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } - usage := &dto.Usage{PromptTokens: chunk.PromptEvalCount, CompletionTokens: chunk.EvalCount, TotalTokens: chunk.PromptEvalCount + chunk.EvalCount} - // Build OpenAI style response + created := toUnix(lastChunk.CreatedAt) + usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount} + content := aggContent.String() + finishReason := lastChunk.DoneReason + if finishReason == "" { finishReason = "stop" } + full := dto.OpenAITextResponse{ Id: common.GetUUID(), Model: model, @@ -159,7 +187,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R Choices: []dto.OpenAITextResponseChoice{ { Index: 0, Message: dto.Message{Role: "assistant", Content: contentPtr(content)}, - FinishReason: func() string { if chunk.DoneReason == "" { return "stop" } ; return chunk.DoneReason }(), + FinishReason: &finishReason, } }, Usage: *usage, } From 9d952e0d78569bee80446c4b6ff2881d04159eb6 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Mon, 15 Sep 2025 23:43:39 +0800 Subject: [PATCH 06/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8DollamaChatHandle?= =?UTF-8?q?r=E4=B8=AD=E7=9A=84FinishReason=E5=AD=97=E6=AE=B5=E8=B5=8B?= =?UTF-8?q?=E5=80=BC=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index 167c676d6..ad12e7f83 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -187,7 +187,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R Choices: []dto.OpenAITextResponseChoice{ { Index: 0, Message: dto.Message{Role: "assistant", Content: contentPtr(content)}, - FinishReason: &finishReason, + FinishReason: finishReason, } }, Usage: *usage, } From 4eeca081fee46faaeb5551c7af355c1ef03de47b Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:13:28 +0800 Subject: [PATCH 07/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=9B=BE?= =?UTF-8?q?=E5=83=8FURL=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E7=A1=AE=E4=BF=9D=E6=AD=A3=E7=A1=AE=E7=94=9F=E6=88=90base64?= =?UTF-8?q?=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/relay-ollama.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/relay/channel/ollama/relay-ollama.go b/relay/channel/ollama/relay-ollama.go index c79f98760..3b67f9525 100644 --- a/relay/channel/ollama/relay-ollama.go +++ b/relay/channel/ollama/relay-ollama.go @@ -76,13 +76,17 @@ func openAIChatToOllamaChat(c *gin.Context, r *dto.GeneralOpenAIRequest) (*Ollam if part.Type == dto.ContentTypeImageURL { img := part.GetImageMedia() if img != nil && img.Url != "" { - // ensure base64 dataURL + var base64Data string if strings.HasPrefix(img.Url, "http") { fileData, err := service.GetFileBase64FromUrl(c, img.Url, "fetch image for ollama chat") if err != nil { return nil, err } - img.Url = fmt.Sprintf("data:%s;base64,%s", fileData.MimeType, fileData.Base64Data) + base64Data = fileData.Base64Data + } else if strings.HasPrefix(img.Url, "data:") { + if idx := strings.Index(img.Url, ","); idx != -1 && idx+1 < len(img.Url) { base64Data = img.Url[idx+1:] } + } else { + base64Data = img.Url } - images = append(images, img.Url) + if base64Data != "" { images = append(images, base64Data) } } } else if part.Type == dto.ContentTypeText { textBuilder.WriteString(part.Text) From 62549717e0652a9914c83ad5e025360e63c0f9c3 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:51:29 +0800 Subject: [PATCH 08/11] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9Thinkin?= =?UTF-8?q?g=E5=AD=97=E6=AE=B5=E7=9A=84=E5=A4=84=E7=90=86=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E7=A1=AE=E4=BF=9D=E6=8E=A8=E7=90=86=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E6=AD=A3=E7=A1=AE=E4=BC=A0=E9=80=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index ad12e7f83..cea458444 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -26,6 +26,7 @@ type ollamaChatStreamChunk struct { Message *struct { Role string `json:"role"` Content string `json:"content"` + Thinking json.RawMessage `json:"thinking"` ToolCalls []struct { Function struct { Name string `json:"name"` @@ -41,7 +42,6 @@ type ollamaChatStreamChunk struct { LoadDuration int64 `json:"load_duration"` PromptEvalCount int `json:"prompt_eval_count"` EvalCount int `json:"eval_count"` - // generate mode may use these PromptEvalDuration int64 `json:"prompt_eval_duration"` EvalDuration int64 `json:"eval_duration"` } @@ -95,13 +95,18 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http } }, } if content != "" { delta.Choices[0].Delta.SetContentString(content) } + if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(chunk.Message.Thinking)) + if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } + } // tool calls if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) for _, tc := range chunk.Message.ToolCalls { // arguments -> string argBytes, _ := json.Marshal(tc.Function.Arguments) - tr := dto.ToolCallResponse{ID:"", Type:nil, Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} + toolId := fmt.Sprintf("call_%d", toolCallIndex) + tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} tr.SetIndex(toolCallIndex) toolCallIndex++ delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) @@ -115,8 +120,8 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http usage.PromptTokens = chunk.PromptEvalCount usage.CompletionTokens = chunk.EvalCount usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens - finishReason := chunk.DoneReason - if finishReason == "" { finishReason = "stop" } + finishReason := chunk.DoneReason + if finishReason == "" { finishReason = "stop" } // emit stop delta if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } @@ -144,6 +149,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R lines := strings.Split(raw, "\n") var ( aggContent strings.Builder + reasoningBuilder strings.Builder lastChunk ollamaChatStreamChunk parsedAny bool ) @@ -157,18 +163,21 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R } parsedAny = true lastChunk = ck - if !ck.Done { - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } - } else { - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + if ck.Message != nil && len(ck.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(ck.Message.Thinking)) + if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } } if !parsedAny { var single ollamaChatStreamChunk if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } lastChunk = single - if single.Message != nil { aggContent.WriteString(single.Message.Content) } else { aggContent.WriteString(single.Response) } + if single.Message != nil { + if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + aggContent.WriteString(single.Message.Content) + } else { aggContent.WriteString(single.Response) } } model := lastChunk.Model @@ -179,6 +188,8 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R finishReason := lastChunk.DoneReason if finishReason == "" { finishReason = "stop" } + msg := dto.Message{Role: "assistant", Content: contentPtr(content)} + if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = &rc } full := dto.OpenAITextResponse{ Id: common.GetUUID(), Model: model, @@ -186,7 +197,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R Created: created, Choices: []dto.OpenAITextResponseChoice{ { Index: 0, - Message: dto.Message{Role: "assistant", Content: contentPtr(content)}, + Message: msg, FinishReason: finishReason, } }, Usage: *usage, From 1dd78b83b7d80e408062f9b18b3db8f860c0a0c9 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:54:34 +0800 Subject: [PATCH 09/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8DollamaChatHandle?= =?UTF-8?q?r=E4=B8=ADReasoningContent=E5=AD=97=E6=AE=B5=E7=9A=84=E8=B5=8B?= =?UTF-8?q?=E5=80=BC=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index cea458444..964f11d90 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -189,7 +189,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R if finishReason == "" { finishReason = "stop" } msg := dto.Message{Role: "assistant", Content: contentPtr(content)} - if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = &rc } + if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc } full := dto.OpenAITextResponse{ Id: common.GetUUID(), Model: model, From 69a88a0563932d62b1d9bb8f9c310ab119909d51 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:58:06 +0800 Subject: [PATCH 10/11] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0ollama=E8=81=8A?= =?UTF-8?q?=E5=A4=A9=E6=B5=81=E5=A4=84=E7=90=86=E5=92=8C=E9=9D=9E=E6=B5=81?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../channel/ollama/stream_20250916085416.go | 210 ++++++++++++++++++ .../channel/ollama/stream_20250916085435.go | 210 ++++++++++++++++++ 2 files changed, 420 insertions(+) create mode 100644 .history/relay/channel/ollama/stream_20250916085416.go create mode 100644 .history/relay/channel/ollama/stream_20250916085435.go diff --git a/.history/relay/channel/ollama/stream_20250916085416.go b/.history/relay/channel/ollama/stream_20250916085416.go new file mode 100644 index 000000000..964f11d90 --- /dev/null +++ b/.history/relay/channel/ollama/stream_20250916085416.go @@ -0,0 +1,210 @@ +package ollama + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "net/http" + "one-api/common" + "one-api/dto" + "one-api/logger" + relaycommon "one-api/relay/common" + "one-api/relay/helper" + "one-api/service" + "one-api/types" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +type ollamaChatStreamChunk struct { + Model string `json:"model"` + CreatedAt string `json:"created_at"` + // chat + Message *struct { + Role string `json:"role"` + Content string `json:"content"` + Thinking json.RawMessage `json:"thinking"` + ToolCalls []struct { + Function struct { + Name string `json:"name"` + Arguments interface{} `json:"arguments"` + } `json:"function"` + } `json:"tool_calls"` + } `json:"message"` + // generate + Response string `json:"response"` + Done bool `json:"done"` + DoneReason string `json:"done_reason"` + TotalDuration int64 `json:"total_duration"` + LoadDuration int64 `json:"load_duration"` + PromptEvalCount int `json:"prompt_eval_count"` + EvalCount int `json:"eval_count"` + PromptEvalDuration int64 `json:"prompt_eval_duration"` + EvalDuration int64 `json:"eval_duration"` +} + +func toUnix(ts string) int64 { + if ts == "" { return time.Now().Unix() } + // try time.RFC3339 or with nanoseconds + t, err := time.Parse(time.RFC3339Nano, ts) + if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() } + return t.Unix() +} + +func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } + defer service.CloseResponseBodyGracefully(resp) + + helper.SetEventStreamHeaders(c) + scanner := bufio.NewScanner(resp.Body) + usage := &dto.Usage{} + var model = info.UpstreamModelName + var responseId = common.GetUUID() + var created = time.Now().Unix() + var toolCallIndex int + start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) + if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } + + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimSpace(line) + if line == "" { continue } + var chunk ollamaChatStreamChunk + if err := json.Unmarshal([]byte(line), &chunk); err != nil { + logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line) + return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + if chunk.Model != "" { model = chunk.Model } + created = toUnix(chunk.CreatedAt) + + if !chunk.Done { + // delta content + var content string + if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } + delta := dto.ChatCompletionsStreamResponse{ + Id: responseId, + Object: "chat.completion.chunk", + Created: created, + Model: model, + Choices: []dto.ChatCompletionsStreamResponseChoice{ { + Index: 0, + Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" }, + } }, + } + if content != "" { delta.Choices[0].Delta.SetContentString(content) } + if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(chunk.Message.Thinking)) + if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } + } + // tool calls + if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { + delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) + for _, tc := range chunk.Message.ToolCalls { + // arguments -> string + argBytes, _ := json.Marshal(tc.Function.Arguments) + toolId := fmt.Sprintf("call_%d", toolCallIndex) + tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} + tr.SetIndex(toolCallIndex) + toolCallIndex++ + delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) + } + } + if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) } + continue + } + // done frame + // finalize once and break loop + usage.PromptTokens = chunk.PromptEvalCount + usage.CompletionTokens = chunk.EvalCount + usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens + finishReason := chunk.DoneReason + if finishReason == "" { finishReason = "stop" } + // emit stop delta + if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { + if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } + } + // emit usage frame + if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { + if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + } + // send [DONE] + helper.Done(c) + break + } + if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } + return usage, nil +} + +// non-stream handler for chat/generate +func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + body, err := io.ReadAll(resp.Body) + if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } + service.CloseResponseBodyGracefully(resp) + raw := string(body) + if common.DebugEnabled { println("ollama non-stream raw resp:", raw) } + + lines := strings.Split(raw, "\n") + var ( + aggContent strings.Builder + reasoningBuilder strings.Builder + lastChunk ollamaChatStreamChunk + parsedAny bool + ) + for _, ln := range lines { + ln = strings.TrimSpace(ln) + if ln == "" { continue } + var ck ollamaChatStreamChunk + if err := json.Unmarshal([]byte(ln), &ck); err != nil { + if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + continue + } + parsedAny = true + lastChunk = ck + if ck.Message != nil && len(ck.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(ck.Message.Thinking)) + if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } + } + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + } + + if !parsedAny { + var single ollamaChatStreamChunk + if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + lastChunk = single + if single.Message != nil { + if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + aggContent.WriteString(single.Message.Content) + } else { aggContent.WriteString(single.Response) } + } + + model := lastChunk.Model + if model == "" { model = info.UpstreamModelName } + created := toUnix(lastChunk.CreatedAt) + usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount} + content := aggContent.String() + finishReason := lastChunk.DoneReason + if finishReason == "" { finishReason = "stop" } + + msg := dto.Message{Role: "assistant", Content: contentPtr(content)} + if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc } + full := dto.OpenAITextResponse{ + Id: common.GetUUID(), + Model: model, + Object: "chat.completion", + Created: created, + Choices: []dto.OpenAITextResponseChoice{ { + Index: 0, + Message: msg, + FinishReason: finishReason, + } }, + Usage: *usage, + } + out, _ := common.Marshal(full) + service.IOCopyBytesGracefully(c, resp, out) + return usage, nil +} + +func contentPtr(s string) *string { if s=="" { return nil }; return &s } diff --git a/.history/relay/channel/ollama/stream_20250916085435.go b/.history/relay/channel/ollama/stream_20250916085435.go new file mode 100644 index 000000000..964f11d90 --- /dev/null +++ b/.history/relay/channel/ollama/stream_20250916085435.go @@ -0,0 +1,210 @@ +package ollama + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "net/http" + "one-api/common" + "one-api/dto" + "one-api/logger" + relaycommon "one-api/relay/common" + "one-api/relay/helper" + "one-api/service" + "one-api/types" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +type ollamaChatStreamChunk struct { + Model string `json:"model"` + CreatedAt string `json:"created_at"` + // chat + Message *struct { + Role string `json:"role"` + Content string `json:"content"` + Thinking json.RawMessage `json:"thinking"` + ToolCalls []struct { + Function struct { + Name string `json:"name"` + Arguments interface{} `json:"arguments"` + } `json:"function"` + } `json:"tool_calls"` + } `json:"message"` + // generate + Response string `json:"response"` + Done bool `json:"done"` + DoneReason string `json:"done_reason"` + TotalDuration int64 `json:"total_duration"` + LoadDuration int64 `json:"load_duration"` + PromptEvalCount int `json:"prompt_eval_count"` + EvalCount int `json:"eval_count"` + PromptEvalDuration int64 `json:"prompt_eval_duration"` + EvalDuration int64 `json:"eval_duration"` +} + +func toUnix(ts string) int64 { + if ts == "" { return time.Now().Unix() } + // try time.RFC3339 or with nanoseconds + t, err := time.Parse(time.RFC3339Nano, ts) + if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() } + return t.Unix() +} + +func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } + defer service.CloseResponseBodyGracefully(resp) + + helper.SetEventStreamHeaders(c) + scanner := bufio.NewScanner(resp.Body) + usage := &dto.Usage{} + var model = info.UpstreamModelName + var responseId = common.GetUUID() + var created = time.Now().Unix() + var toolCallIndex int + start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) + if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } + + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimSpace(line) + if line == "" { continue } + var chunk ollamaChatStreamChunk + if err := json.Unmarshal([]byte(line), &chunk); err != nil { + logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line) + return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + if chunk.Model != "" { model = chunk.Model } + created = toUnix(chunk.CreatedAt) + + if !chunk.Done { + // delta content + var content string + if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } + delta := dto.ChatCompletionsStreamResponse{ + Id: responseId, + Object: "chat.completion.chunk", + Created: created, + Model: model, + Choices: []dto.ChatCompletionsStreamResponseChoice{ { + Index: 0, + Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" }, + } }, + } + if content != "" { delta.Choices[0].Delta.SetContentString(content) } + if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(chunk.Message.Thinking)) + if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } + } + // tool calls + if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { + delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) + for _, tc := range chunk.Message.ToolCalls { + // arguments -> string + argBytes, _ := json.Marshal(tc.Function.Arguments) + toolId := fmt.Sprintf("call_%d", toolCallIndex) + tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} + tr.SetIndex(toolCallIndex) + toolCallIndex++ + delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) + } + } + if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) } + continue + } + // done frame + // finalize once and break loop + usage.PromptTokens = chunk.PromptEvalCount + usage.CompletionTokens = chunk.EvalCount + usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens + finishReason := chunk.DoneReason + if finishReason == "" { finishReason = "stop" } + // emit stop delta + if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { + if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } + } + // emit usage frame + if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { + if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + } + // send [DONE] + helper.Done(c) + break + } + if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } + return usage, nil +} + +// non-stream handler for chat/generate +func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + body, err := io.ReadAll(resp.Body) + if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } + service.CloseResponseBodyGracefully(resp) + raw := string(body) + if common.DebugEnabled { println("ollama non-stream raw resp:", raw) } + + lines := strings.Split(raw, "\n") + var ( + aggContent strings.Builder + reasoningBuilder strings.Builder + lastChunk ollamaChatStreamChunk + parsedAny bool + ) + for _, ln := range lines { + ln = strings.TrimSpace(ln) + if ln == "" { continue } + var ck ollamaChatStreamChunk + if err := json.Unmarshal([]byte(ln), &ck); err != nil { + if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + continue + } + parsedAny = true + lastChunk = ck + if ck.Message != nil && len(ck.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(ck.Message.Thinking)) + if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } + } + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + } + + if !parsedAny { + var single ollamaChatStreamChunk + if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } + lastChunk = single + if single.Message != nil { + if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + aggContent.WriteString(single.Message.Content) + } else { aggContent.WriteString(single.Response) } + } + + model := lastChunk.Model + if model == "" { model = info.UpstreamModelName } + created := toUnix(lastChunk.CreatedAt) + usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount} + content := aggContent.String() + finishReason := lastChunk.DoneReason + if finishReason == "" { finishReason = "stop" } + + msg := dto.Message{Role: "assistant", Content: contentPtr(content)} + if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc } + full := dto.OpenAITextResponse{ + Id: common.GetUUID(), + Model: model, + Object: "chat.completion", + Created: created, + Choices: []dto.OpenAITextResponseChoice{ { + Index: 0, + Message: msg, + FinishReason: finishReason, + } }, + Usage: *usage, + } + out, _ := common.Marshal(full) + service.IOCopyBytesGracefully(c, resp, out) + return usage, nil +} + +func contentPtr(s string) *string { if s=="" { return nil }; return &s } From f19b5b8680f5078e2516e856670ecba463c010b5 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:58:19 +0800 Subject: [PATCH 11/11] =?UTF-8?q?chore:=20=E5=88=A0=E9=99=A4=E5=8E=86?= =?UTF-8?q?=E5=8F=B2=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../channel/ollama/stream_20250916085416.go | 210 ------------------ .../channel/ollama/stream_20250916085435.go | 210 ------------------ 2 files changed, 420 deletions(-) delete mode 100644 .history/relay/channel/ollama/stream_20250916085416.go delete mode 100644 .history/relay/channel/ollama/stream_20250916085435.go diff --git a/.history/relay/channel/ollama/stream_20250916085416.go b/.history/relay/channel/ollama/stream_20250916085416.go deleted file mode 100644 index 964f11d90..000000000 --- a/.history/relay/channel/ollama/stream_20250916085416.go +++ /dev/null @@ -1,210 +0,0 @@ -package ollama - -import ( - "bufio" - "encoding/json" - "fmt" - "io" - "net/http" - "one-api/common" - "one-api/dto" - "one-api/logger" - relaycommon "one-api/relay/common" - "one-api/relay/helper" - "one-api/service" - "one-api/types" - "strings" - "time" - - "github.com/gin-gonic/gin" -) - -type ollamaChatStreamChunk struct { - Model string `json:"model"` - CreatedAt string `json:"created_at"` - // chat - Message *struct { - Role string `json:"role"` - Content string `json:"content"` - Thinking json.RawMessage `json:"thinking"` - ToolCalls []struct { - Function struct { - Name string `json:"name"` - Arguments interface{} `json:"arguments"` - } `json:"function"` - } `json:"tool_calls"` - } `json:"message"` - // generate - Response string `json:"response"` - Done bool `json:"done"` - DoneReason string `json:"done_reason"` - TotalDuration int64 `json:"total_duration"` - LoadDuration int64 `json:"load_duration"` - PromptEvalCount int `json:"prompt_eval_count"` - EvalCount int `json:"eval_count"` - PromptEvalDuration int64 `json:"prompt_eval_duration"` - EvalDuration int64 `json:"eval_duration"` -} - -func toUnix(ts string) int64 { - if ts == "" { return time.Now().Unix() } - // try time.RFC3339 or with nanoseconds - t, err := time.Parse(time.RFC3339Nano, ts) - if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() } - return t.Unix() -} - -func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { - if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } - defer service.CloseResponseBodyGracefully(resp) - - helper.SetEventStreamHeaders(c) - scanner := bufio.NewScanner(resp.Body) - usage := &dto.Usage{} - var model = info.UpstreamModelName - var responseId = common.GetUUID() - var created = time.Now().Unix() - var toolCallIndex int - start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) - if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } - - for scanner.Scan() { - line := scanner.Text() - line = strings.TrimSpace(line) - if line == "" { continue } - var chunk ollamaChatStreamChunk - if err := json.Unmarshal([]byte(line), &chunk); err != nil { - logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line) - return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } - if chunk.Model != "" { model = chunk.Model } - created = toUnix(chunk.CreatedAt) - - if !chunk.Done { - // delta content - var content string - if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } - delta := dto.ChatCompletionsStreamResponse{ - Id: responseId, - Object: "chat.completion.chunk", - Created: created, - Model: model, - Choices: []dto.ChatCompletionsStreamResponseChoice{ { - Index: 0, - Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" }, - } }, - } - if content != "" { delta.Choices[0].Delta.SetContentString(content) } - if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { - raw := strings.TrimSpace(string(chunk.Message.Thinking)) - if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } - } - // tool calls - if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { - delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) - for _, tc := range chunk.Message.ToolCalls { - // arguments -> string - argBytes, _ := json.Marshal(tc.Function.Arguments) - toolId := fmt.Sprintf("call_%d", toolCallIndex) - tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} - tr.SetIndex(toolCallIndex) - toolCallIndex++ - delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) - } - } - if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) } - continue - } - // done frame - // finalize once and break loop - usage.PromptTokens = chunk.PromptEvalCount - usage.CompletionTokens = chunk.EvalCount - usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens - finishReason := chunk.DoneReason - if finishReason == "" { finishReason = "stop" } - // emit stop delta - if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { - if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } - } - // emit usage frame - if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { - if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } - } - // send [DONE] - helper.Done(c) - break - } - if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } - return usage, nil -} - -// non-stream handler for chat/generate -func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { - body, err := io.ReadAll(resp.Body) - if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } - service.CloseResponseBodyGracefully(resp) - raw := string(body) - if common.DebugEnabled { println("ollama non-stream raw resp:", raw) } - - lines := strings.Split(raw, "\n") - var ( - aggContent strings.Builder - reasoningBuilder strings.Builder - lastChunk ollamaChatStreamChunk - parsedAny bool - ) - for _, ln := range lines { - ln = strings.TrimSpace(ln) - if ln == "" { continue } - var ck ollamaChatStreamChunk - if err := json.Unmarshal([]byte(ln), &ck); err != nil { - if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } - continue - } - parsedAny = true - lastChunk = ck - if ck.Message != nil && len(ck.Message.Thinking) > 0 { - raw := strings.TrimSpace(string(ck.Message.Thinking)) - if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } - } - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } - } - - if !parsedAny { - var single ollamaChatStreamChunk - if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } - lastChunk = single - if single.Message != nil { - if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } - aggContent.WriteString(single.Message.Content) - } else { aggContent.WriteString(single.Response) } - } - - model := lastChunk.Model - if model == "" { model = info.UpstreamModelName } - created := toUnix(lastChunk.CreatedAt) - usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount} - content := aggContent.String() - finishReason := lastChunk.DoneReason - if finishReason == "" { finishReason = "stop" } - - msg := dto.Message{Role: "assistant", Content: contentPtr(content)} - if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc } - full := dto.OpenAITextResponse{ - Id: common.GetUUID(), - Model: model, - Object: "chat.completion", - Created: created, - Choices: []dto.OpenAITextResponseChoice{ { - Index: 0, - Message: msg, - FinishReason: finishReason, - } }, - Usage: *usage, - } - out, _ := common.Marshal(full) - service.IOCopyBytesGracefully(c, resp, out) - return usage, nil -} - -func contentPtr(s string) *string { if s=="" { return nil }; return &s } diff --git a/.history/relay/channel/ollama/stream_20250916085435.go b/.history/relay/channel/ollama/stream_20250916085435.go deleted file mode 100644 index 964f11d90..000000000 --- a/.history/relay/channel/ollama/stream_20250916085435.go +++ /dev/null @@ -1,210 +0,0 @@ -package ollama - -import ( - "bufio" - "encoding/json" - "fmt" - "io" - "net/http" - "one-api/common" - "one-api/dto" - "one-api/logger" - relaycommon "one-api/relay/common" - "one-api/relay/helper" - "one-api/service" - "one-api/types" - "strings" - "time" - - "github.com/gin-gonic/gin" -) - -type ollamaChatStreamChunk struct { - Model string `json:"model"` - CreatedAt string `json:"created_at"` - // chat - Message *struct { - Role string `json:"role"` - Content string `json:"content"` - Thinking json.RawMessage `json:"thinking"` - ToolCalls []struct { - Function struct { - Name string `json:"name"` - Arguments interface{} `json:"arguments"` - } `json:"function"` - } `json:"tool_calls"` - } `json:"message"` - // generate - Response string `json:"response"` - Done bool `json:"done"` - DoneReason string `json:"done_reason"` - TotalDuration int64 `json:"total_duration"` - LoadDuration int64 `json:"load_duration"` - PromptEvalCount int `json:"prompt_eval_count"` - EvalCount int `json:"eval_count"` - PromptEvalDuration int64 `json:"prompt_eval_duration"` - EvalDuration int64 `json:"eval_duration"` -} - -func toUnix(ts string) int64 { - if ts == "" { return time.Now().Unix() } - // try time.RFC3339 or with nanoseconds - t, err := time.Parse(time.RFC3339Nano, ts) - if err != nil { t2, err2 := time.Parse(time.RFC3339, ts); if err2==nil { return t2.Unix() }; return time.Now().Unix() } - return t.Unix() -} - -func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { - if resp == nil || resp.Body == nil { return nil, types.NewOpenAIError(fmt.Errorf("empty response"), types.ErrorCodeBadResponse, http.StatusBadRequest) } - defer service.CloseResponseBodyGracefully(resp) - - helper.SetEventStreamHeaders(c) - scanner := bufio.NewScanner(resp.Body) - usage := &dto.Usage{} - var model = info.UpstreamModelName - var responseId = common.GetUUID() - var created = time.Now().Unix() - var toolCallIndex int - start := helper.GenerateStartEmptyResponse(responseId, created, model, nil) - if data, err := common.Marshal(start); err == nil { _ = helper.StringData(c, string(data)) } - - for scanner.Scan() { - line := scanner.Text() - line = strings.TrimSpace(line) - if line == "" { continue } - var chunk ollamaChatStreamChunk - if err := json.Unmarshal([]byte(line), &chunk); err != nil { - logger.LogError(c, "ollama stream json decode error: "+err.Error()+" line="+line) - return usage, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) - } - if chunk.Model != "" { model = chunk.Model } - created = toUnix(chunk.CreatedAt) - - if !chunk.Done { - // delta content - var content string - if chunk.Message != nil { content = chunk.Message.Content } else { content = chunk.Response } - delta := dto.ChatCompletionsStreamResponse{ - Id: responseId, - Object: "chat.completion.chunk", - Created: created, - Model: model, - Choices: []dto.ChatCompletionsStreamResponseChoice{ { - Index: 0, - Delta: dto.ChatCompletionsStreamResponseChoiceDelta{ Role: "assistant" }, - } }, - } - if content != "" { delta.Choices[0].Delta.SetContentString(content) } - if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { - raw := strings.TrimSpace(string(chunk.Message.Thinking)) - if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } - } - // tool calls - if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { - delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) - for _, tc := range chunk.Message.ToolCalls { - // arguments -> string - argBytes, _ := json.Marshal(tc.Function.Arguments) - toolId := fmt.Sprintf("call_%d", toolCallIndex) - tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} - tr.SetIndex(toolCallIndex) - toolCallIndex++ - delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) - } - } - if data, err := common.Marshal(delta); err == nil { _ = helper.StringData(c, string(data)) } - continue - } - // done frame - // finalize once and break loop - usage.PromptTokens = chunk.PromptEvalCount - usage.CompletionTokens = chunk.EvalCount - usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens - finishReason := chunk.DoneReason - if finishReason == "" { finishReason = "stop" } - // emit stop delta - if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { - if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } - } - // emit usage frame - if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { - if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } - } - // send [DONE] - helper.Done(c) - break - } - if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } - return usage, nil -} - -// non-stream handler for chat/generate -func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) { - body, err := io.ReadAll(resp.Body) - if err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) } - service.CloseResponseBodyGracefully(resp) - raw := string(body) - if common.DebugEnabled { println("ollama non-stream raw resp:", raw) } - - lines := strings.Split(raw, "\n") - var ( - aggContent strings.Builder - reasoningBuilder strings.Builder - lastChunk ollamaChatStreamChunk - parsedAny bool - ) - for _, ln := range lines { - ln = strings.TrimSpace(ln) - if ln == "" { continue } - var ck ollamaChatStreamChunk - if err := json.Unmarshal([]byte(ln), &ck); err != nil { - if len(lines) == 1 { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } - continue - } - parsedAny = true - lastChunk = ck - if ck.Message != nil && len(ck.Message.Thinking) > 0 { - raw := strings.TrimSpace(string(ck.Message.Thinking)) - if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } - } - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } - } - - if !parsedAny { - var single ollamaChatStreamChunk - if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } - lastChunk = single - if single.Message != nil { - if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } - aggContent.WriteString(single.Message.Content) - } else { aggContent.WriteString(single.Response) } - } - - model := lastChunk.Model - if model == "" { model = info.UpstreamModelName } - created := toUnix(lastChunk.CreatedAt) - usage := &dto.Usage{PromptTokens: lastChunk.PromptEvalCount, CompletionTokens: lastChunk.EvalCount, TotalTokens: lastChunk.PromptEvalCount + lastChunk.EvalCount} - content := aggContent.String() - finishReason := lastChunk.DoneReason - if finishReason == "" { finishReason = "stop" } - - msg := dto.Message{Role: "assistant", Content: contentPtr(content)} - if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = rc } - full := dto.OpenAITextResponse{ - Id: common.GetUUID(), - Model: model, - Object: "chat.completion", - Created: created, - Choices: []dto.OpenAITextResponseChoice{ { - Index: 0, - Message: msg, - FinishReason: finishReason, - } }, - Usage: *usage, - } - out, _ := common.Marshal(full) - service.IOCopyBytesGracefully(c, resp, out) - return usage, nil -} - -func contentPtr(s string) *string { if s=="" { return nil }; return &s }