fix: consolidate chat-completions compatibility fixes

- apply default mapped model only when scheduling fallback is actually used - preserve reasoning in OpenAI-compatible output via reasoning_content and avoid invalid input function_call ids
2026-05-14 12:38:34 +00:00 · 2026-03-14 12:12:08 +08:00
parent 4588258d80
commit ece0606fed
8 changed files with 162 additions and 33 deletions
--- a/backend/internal/handler/openai_chat_completions.go
+++ b/backend/internal/handler/openai_chat_completions.go
@@ -181,13 +181,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
 		service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
 		forwardStart := time.Now()

-		defaultMappedModel := ""
-		if apiKey.Group != nil {
-			defaultMappedModel = apiKey.Group.DefaultMappedModel
-		}
-		if fallbackModel := c.GetString("openai_chat_completions_fallback_model"); fallbackModel != "" {
-			defaultMappedModel = fallbackModel
-		}
+		defaultMappedModel := c.GetString("openai_chat_completions_fallback_model")
 		result, err := h.gatewayService.ForwardAsChatCompletions(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)

 		forwardDurationMs := time.Since(forwardStart).Milliseconds()
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -655,14 +655,9 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 		service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
 		forwardStart := time.Now()

-		defaultMappedModel := ""
-		if apiKey.Group != nil {
-			defaultMappedModel = apiKey.Group.DefaultMappedModel
-		}
-		// 如果使用了降级模型调度，强制使用降级模型
-		if fallbackModel := c.GetString("openai_messages_fallback_model"); fallbackModel != "" {
-			defaultMappedModel = fallbackModel
-		}
+		// 仅在调度时实际触发了降级（原模型无可用账号、改用默认模型重试成功）时，
+		// 才将降级模型传给 Forward 层做模型替换；否则保持用户请求的原始模型。
+		defaultMappedModel := c.GetString("openai_messages_fallback_model")
 		result, err := h.gatewayService.ForwardAsAnthropic(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)

 		forwardDurationMs := time.Since(forwardStart).Milliseconds()
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -105,6 +105,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) {
 	assert.Equal(t, "assistant", items[1].Role)
 	assert.Equal(t, "function_call", items[2].Type)
 	assert.Equal(t, "fc_call_1", items[2].CallID)
+	assert.Empty(t, items[2].ID)
 	assert.Equal(t, "function_call_output", items[3].Type)
 	assert.Equal(t, "fc_call_1", items[3].CallID)
 	assert.Equal(t, "Sunny, 72°F", items[3].Output)
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -277,7 +277,6 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e
 			CallID:    fcID,
 			Name:      b.Name,
 			Arguments: args,
-			ID:        fcID,
 		})
 	}

--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -99,6 +99,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) {
 	// Check function_call item
 	assert.Equal(t, "function_call", items[1].Type)
 	assert.Equal(t, "call_1", items[1].CallID)
+	assert.Empty(t, items[1].ID)
 	assert.Equal(t, "ping", items[1].Name)

 	// Check function_call_output item
@@ -252,6 +253,55 @@ func TestChatCompletionsToResponses_AssistantWithTextAndToolCalls(t *testing.T)
 	assert.Equal(t, "user", items[0].Role)
 	assert.Equal(t, "assistant", items[1].Role)
 	assert.Equal(t, "function_call", items[2].Type)
+	assert.Empty(t, items[2].ID)
+}
+
+func TestChatCompletionsToResponses_AssistantArrayContentPreserved(t *testing.T) {
+	req := &ChatCompletionsRequest{
+		Model: "gpt-4o",
+		Messages: []ChatMessage{
+			{Role: "user", Content: json.RawMessage(`"Hi"`)},
+			{Role: "assistant", Content: json.RawMessage(`[{"type":"text","text":"A"},{"type":"text","text":"B"}]`)},
+		},
+	}
+
+	resp, err := ChatCompletionsToResponses(req)
+	require.NoError(t, err)
+
+	var items []ResponsesInputItem
+	require.NoError(t, json.Unmarshal(resp.Input, &items))
+	require.Len(t, items, 2)
+	assert.Equal(t, "assistant", items[1].Role)
+
+	var parts []ResponsesContentPart
+	require.NoError(t, json.Unmarshal(items[1].Content, &parts))
+	require.Len(t, parts, 1)
+	assert.Equal(t, "output_text", parts[0].Type)
+	assert.Equal(t, "AB", parts[0].Text)
+}
+
+func TestChatCompletionsToResponses_AssistantThinkingTagPreserved(t *testing.T) {
+	req := &ChatCompletionsRequest{
+		Model: "gpt-4o",
+		Messages: []ChatMessage{
+			{Role: "user", Content: json.RawMessage(`"Hi"`)},
+			{Role: "assistant", Content: json.RawMessage(`[{"type":"thinking","thinking":"internal plan"},{"type":"text","text":"final answer"}]`)},
+		},
+	}
+
+	resp, err := ChatCompletionsToResponses(req)
+	require.NoError(t, err)
+
+	var items []ResponsesInputItem
+	require.NoError(t, json.Unmarshal(resp.Input, &items))
+	require.Len(t, items, 2)
+
+	var parts []ResponsesContentPart
+	require.NoError(t, json.Unmarshal(items[1].Content, &parts))
+	require.Len(t, parts, 1)
+	assert.Equal(t, "output_text", parts[0].Type)
+	assert.Contains(t, parts[0].Text, "<thinking>internal plan</thinking>")
+	assert.Contains(t, parts[0].Text, "final answer")
 }

 // ---------------------------------------------------------------------------
@@ -344,8 +394,8 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {

 	var content string
 	require.NoError(t, json.Unmarshal(chat.Choices[0].Message.Content, &content))
-	// Reasoning summary is prepended to text
-	assert.Equal(t, "I thought about it.The answer is 42.", content)
+	assert.Equal(t, "The answer is 42.", content)
+	assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
 }

 func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
@@ -582,8 +632,35 @@ func TestResponsesEventToChatChunks_ReasoningDelta(t *testing.T) {
 		Delta: "Thinking...",
 	}, state)
 	require.Len(t, chunks, 1)
+	require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
+	assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.ReasoningContent)
+
+	chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
+		Type: "response.reasoning_summary_text.done",
+	}, state)
+	require.Len(t, chunks, 0)
+}
+
+func TestResponsesEventToChatChunks_ReasoningThenTextAutoCloseTag(t *testing.T) {
+	state := NewResponsesEventToChatState()
+	state.Model = "gpt-4o"
+	state.SentRole = true
+
+	chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{
+		Type:  "response.reasoning_summary_text.delta",
+		Delta: "plan",
+	}, state)
+	require.Len(t, chunks, 1)
+	require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
+	assert.Equal(t, "plan", *chunks[0].Choices[0].Delta.ReasoningContent)
+
+	chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
+		Type:  "response.output_text.delta",
+		Delta: "answer",
+	}, state)
+	require.Len(t, chunks, 1)
 	require.NotNil(t, chunks[0].Choices[0].Delta.Content)
-	assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.Content)
+	assert.Equal(t, "answer", *chunks[0].Choices[0].Delta.Content)
 }

 func TestFinalizeResponsesChatStream(t *testing.T) {
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -3,6 +3,7 @@ package apicompat
 import (
 	"encoding/json"
 	"fmt"
+	"strings"
 )

 // ChatCompletionsToResponses converts a Chat Completions request into a
@@ -174,8 +175,11 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {

 	// Emit assistant message with output_text if content is non-empty.
 	if len(m.Content) > 0 {
-		var s string
-		if err := json.Unmarshal(m.Content, &s); err == nil && s != "" {
+		s, err := parseAssistantContent(m.Content)
+		if err != nil {
+			return nil, err
+		}
+		if s != "" {
 			parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
 			partsJSON, err := json.Marshal(parts)
 			if err != nil {
@@ -196,13 +200,64 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 			CallID:    tc.ID,
 			Name:      tc.Function.Name,
 			Arguments: args,
-			ID:        tc.ID,
 		})
 	}

 	return items, nil
 }

+// parseAssistantContent returns assistant content as plain text.
+//
+// Supported formats:
+// - JSON string
+// - JSON array of typed parts (e.g. [{"type":"text","text":"..."}])
+//
+// For structured thinking/reasoning parts, it preserves semantics by wrapping
+// the text in explicit tags so downstream can still distinguish it from normal text.
+func parseAssistantContent(raw json.RawMessage) (string, error) {
+	if len(raw) == 0 {
+		return "", nil
+	}
+
+	var s string
+	if err := json.Unmarshal(raw, &s); err == nil {
+		return s, nil
+	}
+
+	var parts []map[string]any
+	if err := json.Unmarshal(raw, &parts); err != nil {
+		// Keep compatibility with prior behavior: unsupported assistant content
+		// formats are ignored instead of failing the whole request conversion.
+		return "", nil
+	}
+
+	var b strings.Builder
+	for _, p := range parts {
+		typ, _ := p["type"].(string)
+		text, _ := p["text"].(string)
+		thinking, _ := p["thinking"].(string)
+
+		switch typ {
+		case "thinking", "reasoning":
+			if thinking != "" {
+				b.WriteString("<thinking>")
+				b.WriteString(thinking)
+				b.WriteString("</thinking>")
+			} else if text != "" {
+				b.WriteString("<thinking>")
+				b.WriteString(text)
+				b.WriteString("</thinking>")
+			}
+		default:
+			if text != "" {
+				b.WriteString(text)
+			}
+		}
+	}
+
+	return b.String(), nil
+}
+
 // chatToolToResponses converts a tool result message (role=tool) into a
 // function_call_output item.
 func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
--- a/backend/internal/pkg/apicompat/responses_to_chatcompletions.go
+++ b/backend/internal/pkg/apicompat/responses_to_chatcompletions.go
@@ -29,6 +29,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 	}

 	var contentText string
+	var reasoningText string
 	var toolCalls []ChatToolCall

 	for _, item := range resp.Output {
@@ -51,7 +52,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 		case "reasoning":
 			for _, s := range item.Summary {
 				if s.Type == "summary_text" && s.Text != "" {
-					contentText += s.Text
+					reasoningText += s.Text
 				}
 			}
 		case "web_search_call":
@@ -67,6 +68,9 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 		raw, _ := json.Marshal(contentText)
 		msg.Content = raw
 	}
+	if reasoningText != "" {
+		msg.ReasoningContent = reasoningText
+	}

 	finishReason := responsesStatusToChatFinishReason(resp.Status, resp.IncompleteDetails, toolCalls)

@@ -153,6 +157,8 @@ func ResponsesEventToChatChunks(evt *ResponsesStreamEvent, state *ResponsesEvent
 		return resToChatHandleFuncArgsDelta(evt, state)
 	case "response.reasoning_summary_text.delta":
 		return resToChatHandleReasoningDelta(evt, state)
+	case "response.reasoning_summary_text.done":
+		return nil
 	case "response.completed", "response.incomplete", "response.failed":
 		return resToChatHandleCompleted(evt, state)
 	default:
@@ -276,8 +282,8 @@ func resToChatHandleReasoningDelta(evt *ResponsesStreamEvent, state *ResponsesEv
 	if evt.Delta == "" {
 		return nil
 	}
-	content := evt.Delta
-	return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{Content: &content})}
+	reasoning := evt.Delta
+	return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{ReasoningContent: &reasoning})}
 }

 func resToChatHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventToChatState) []ChatCompletionsChunk {
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -361,11 +361,12 @@ type ChatStreamOptions struct {

 // ChatMessage is a single message in the Chat Completions conversation.
 type ChatMessage struct {
-	Role       string          `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
-	Content    json.RawMessage `json:"content,omitempty"`
-	Name       string          `json:"name,omitempty"`
-	ToolCalls  []ChatToolCall  `json:"tool_calls,omitempty"`
-	ToolCallID string          `json:"tool_call_id,omitempty"`
+	Role             string          `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
+	Content          json.RawMessage `json:"content,omitempty"`
+	ReasoningContent string          `json:"reasoning_content,omitempty"`
+	Name             string          `json:"name,omitempty"`
+	ToolCalls        []ChatToolCall  `json:"tool_calls,omitempty"`
+	ToolCallID       string          `json:"tool_call_id,omitempty"`

 	// Legacy function calling
 	FunctionCall *ChatFunctionCall `json:"function_call,omitempty"`
@@ -466,9 +467,10 @@ type ChatChunkChoice struct {

 // ChatDelta carries incremental content in a streaming chunk.
 type ChatDelta struct {
-	Role      string         `json:"role,omitempty"`
-	Content   *string        `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
-	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
+	Role             string         `json:"role,omitempty"`
+	Content          *string        `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
+	ReasoningContent *string        `json:"reasoning_content,omitempty"`
+	ToolCalls        []ChatToolCall `json:"tool_calls,omitempty"`
 }

 // ---------------------------------------------------------------------------