fix: consolidate chat-completions compatibility fixes

- apply default mapped model only when scheduling fallback is actually used - preserve reasoning in OpenAI-compatible output via reasoning_content and avoid invalid input function_call ids
2026-05-23 10:28:11 +00:00 · 2026-03-14 12:12:08 +08:00
parent 4588258d80
commit ece0606fed
8 changed files with 162 additions and 33 deletions
--- a/backend/internal/handler/openai_chat_completions.go
+++ b/backend/internal/handler/openai_chat_completions.go
@@ -181,13 +181,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
 		service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
 		forwardStart := time.Now()
-		defaultMappedModel := ""
+		defaultMappedModel := c.GetString("openai_chat_completions_fallback_model")
 		if apiKey.Group != nil {
 			defaultMappedModel = apiKey.Group.DefaultMappedModel
 		}
 		if fallbackModel := c.GetString("openai_chat_completions_fallback_model"); fallbackModel != "" {
 			defaultMappedModel = fallbackModel
 		}
 		result, err := h.gatewayService.ForwardAsChatCompletions(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
 		forwardDurationMs := time.Since(forwardStart).Milliseconds()
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -655,14 +655,9 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 		service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
 		forwardStart := time.Now()
-		defaultMappedModel := ""
+		// 仅在调度时实际触发了降级（原模型无可用账号、改用默认模型重试成功）时，
-		if apiKey.Group != nil {
+		// 才将降级模型传给 Forward 层做模型替换；否则保持用户请求的原始模型。
-			defaultMappedModel = apiKey.Group.DefaultMappedModel
+		defaultMappedModel := c.GetString("openai_messages_fallback_model")
 		}
 		// 如果使用了降级模型调度，强制使用降级模型
 		if fallbackModel := c.GetString("openai_messages_fallback_model"); fallbackModel != "" {
 			defaultMappedModel = fallbackModel
 		}
 		result, err := h.gatewayService.ForwardAsAnthropic(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
 		forwardDurationMs := time.Since(forwardStart).Milliseconds()
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -105,6 +105,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) {
 	assert.Equal(t, "assistant", items[1].Role)
 	assert.Equal(t, "function_call", items[2].Type)
 	assert.Equal(t, "fc_call_1", items[2].CallID)
 	assert.Empty(t, items[2].ID)
 	assert.Equal(t, "function_call_output", items[3].Type)
 	assert.Equal(t, "fc_call_1", items[3].CallID)
 	assert.Equal(t, "Sunny, 72°F", items[3].Output)
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -277,7 +277,6 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e
 			CallID:    fcID,
 			Name:      b.Name,
 			Arguments: args,
 			ID:        fcID,
 		})
 	}
--- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go
@@ -99,6 +99,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) {
 	// Check function_call item
 	assert.Equal(t, "function_call", items[1].Type)
 	assert.Equal(t, "call_1", items[1].CallID)
 	assert.Empty(t, items[1].ID)
 	assert.Equal(t, "ping", items[1].Name)
 	// Check function_call_output item
@@ -252,6 +253,55 @@ func TestChatCompletionsToResponses_AssistantWithTextAndToolCalls(t *testing.T)
 	assert.Equal(t, "user", items[0].Role)
 	assert.Equal(t, "assistant", items[1].Role)
 	assert.Equal(t, "function_call", items[2].Type)
 	assert.Empty(t, items[2].ID)
 }
 func TestChatCompletionsToResponses_AssistantArrayContentPreserved(t *testing.T) {
 	req := &ChatCompletionsRequest{
 		Model: "gpt-4o",
 		Messages: []ChatMessage{
 			{Role: "user", Content: json.RawMessage(`"Hi"`)},
 			{Role: "assistant", Content: json.RawMessage(`[{"type":"text","text":"A"},{"type":"text","text":"B"}]`)},
 		},
 	}
 	resp, err := ChatCompletionsToResponses(req)
 	require.NoError(t, err)
 	var items []ResponsesInputItem
 	require.NoError(t, json.Unmarshal(resp.Input, &items))
 	require.Len(t, items, 2)
 	assert.Equal(t, "assistant", items[1].Role)
 	var parts []ResponsesContentPart
 	require.NoError(t, json.Unmarshal(items[1].Content, &parts))
 	require.Len(t, parts, 1)
 	assert.Equal(t, "output_text", parts[0].Type)
 	assert.Equal(t, "AB", parts[0].Text)
 }
 func TestChatCompletionsToResponses_AssistantThinkingTagPreserved(t *testing.T) {
 	req := &ChatCompletionsRequest{
 		Model: "gpt-4o",
 		Messages: []ChatMessage{
 			{Role: "user", Content: json.RawMessage(`"Hi"`)},
 			{Role: "assistant", Content: json.RawMessage(`[{"type":"thinking","thinking":"internal plan"},{"type":"text","text":"final answer"}]`)},
 		},
 	}
 	resp, err := ChatCompletionsToResponses(req)
 	require.NoError(t, err)
 	var items []ResponsesInputItem
 	require.NoError(t, json.Unmarshal(resp.Input, &items))
 	require.Len(t, items, 2)
 	var parts []ResponsesContentPart
 	require.NoError(t, json.Unmarshal(items[1].Content, &parts))
 	require.Len(t, parts, 1)
 	assert.Equal(t, "output_text", parts[0].Type)
 	assert.Contains(t, parts[0].Text, "<thinking>internal plan</thinking>")
 	assert.Contains(t, parts[0].Text, "final answer")
 }
 // ---------------------------------------------------------------------------
@@ -344,8 +394,8 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {
 	var content string
 	require.NoError(t, json.Unmarshal(chat.Choices[0].Message.Content, &content))
-	// Reasoning summary is prepended to text
+	assert.Equal(t, "The answer is 42.", content)
-	assert.Equal(t, "I thought about it.The answer is 42.", content)
+	assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
 }
 func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
@@ -582,8 +632,35 @@ func TestResponsesEventToChatChunks_ReasoningDelta(t *testing.T) {
 		Delta: "Thinking...",
 	}, state)
 	require.Len(t, chunks, 1)
 	require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
 	assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.ReasoningContent)
 	chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
 		Type: "response.reasoning_summary_text.done",
 	}, state)
 	require.Len(t, chunks, 0)
 }
 func TestResponsesEventToChatChunks_ReasoningThenTextAutoCloseTag(t *testing.T) {
 	state := NewResponsesEventToChatState()
 	state.Model = "gpt-4o"
 	state.SentRole = true
 	chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{
 		Type:  "response.reasoning_summary_text.delta",
 		Delta: "plan",
 	}, state)
 	require.Len(t, chunks, 1)
 	require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
 	assert.Equal(t, "plan", *chunks[0].Choices[0].Delta.ReasoningContent)
 	chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
 		Type:  "response.output_text.delta",
 		Delta: "answer",
 	}, state)
 	require.Len(t, chunks, 1)
 	require.NotNil(t, chunks[0].Choices[0].Delta.Content)
-	assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.Content)
+	assert.Equal(t, "answer", *chunks[0].Choices[0].Delta.Content)
 }
 func TestFinalizeResponsesChatStream(t *testing.T) {
--- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
+++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go
@@ -3,6 +3,7 @@ package apicompat
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 )
 // ChatCompletionsToResponses converts a Chat Completions request into a
@@ -174,8 +175,11 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 	// Emit assistant message with output_text if content is non-empty.
 	if len(m.Content) > 0 {
-		var s string
+		s, err := parseAssistantContent(m.Content)
-		if err := json.Unmarshal(m.Content, &s); err == nil && s != "" {
+		if err != nil {
 			return nil, err
 		}
 		if s != "" {
 			parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
 			partsJSON, err := json.Marshal(parts)
 			if err != nil {
@@ -196,13 +200,64 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
 			CallID:    tc.ID,
 			Name:      tc.Function.Name,
 			Arguments: args,
 			ID:        tc.ID,
 		})
 	}
 	return items, nil
 }
 // parseAssistantContent returns assistant content as plain text.
 //
 // Supported formats:
 // - JSON string
 // - JSON array of typed parts (e.g. [{"type":"text","text":"..."}])
 //
 // For structured thinking/reasoning parts, it preserves semantics by wrapping
 // the text in explicit tags so downstream can still distinguish it from normal text.
 func parseAssistantContent(raw json.RawMessage) (string, error) {
 	if len(raw) == 0 {
 		return "", nil
 	}
 	var s string
 	if err := json.Unmarshal(raw, &s); err == nil {
 		return s, nil
 	}
 	var parts []map[string]any
 	if err := json.Unmarshal(raw, &parts); err != nil {
 		// Keep compatibility with prior behavior: unsupported assistant content
 		// formats are ignored instead of failing the whole request conversion.
 		return "", nil
 	}
 	var b strings.Builder
 	for _, p := range parts {
 		typ, _ := p["type"].(string)
 		text, _ := p["text"].(string)
 		thinking, _ := p["thinking"].(string)
 		switch typ {
 		case "thinking", "reasoning":
 			if thinking != "" {
 				b.WriteString("<thinking>")
 				b.WriteString(thinking)
 				b.WriteString("</thinking>")
 			} else if text != "" {
 				b.WriteString("<thinking>")
 				b.WriteString(text)
 				b.WriteString("</thinking>")
 			}
 		default:
 			if text != "" {
 				b.WriteString(text)
 			}
 		}
 	}
 	return b.String(), nil
 }
 // chatToolToResponses converts a tool result message (role=tool) into a
 // function_call_output item.
 func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
--- a/backend/internal/pkg/apicompat/responses_to_chatcompletions.go
+++ b/backend/internal/pkg/apicompat/responses_to_chatcompletions.go
@@ -29,6 +29,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 	}
 	var contentText string
 	var reasoningText string
 	var toolCalls []ChatToolCall
 	for _, item := range resp.Output {
@@ -51,7 +52,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 		case "reasoning":
 			for _, s := range item.Summary {
 				if s.Type == "summary_text" && s.Text != "" {
-					contentText += s.Text
+					reasoningText += s.Text
 				}
 			}
 		case "web_search_call":
@@ -67,6 +68,9 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
 		raw, _ := json.Marshal(contentText)
 		msg.Content = raw
 	}
 	if reasoningText != "" {
 		msg.ReasoningContent = reasoningText
 	}
 	finishReason := responsesStatusToChatFinishReason(resp.Status, resp.IncompleteDetails, toolCalls)
@@ -153,6 +157,8 @@ func ResponsesEventToChatChunks(evt *ResponsesStreamEvent, state *ResponsesEvent
 		return resToChatHandleFuncArgsDelta(evt, state)
 	case "response.reasoning_summary_text.delta":
 		return resToChatHandleReasoningDelta(evt, state)
 	case "response.reasoning_summary_text.done":
 		return nil
 	case "response.completed", "response.incomplete", "response.failed":
 		return resToChatHandleCompleted(evt, state)
 	default:
@@ -276,8 +282,8 @@ func resToChatHandleReasoningDelta(evt *ResponsesStreamEvent, state *ResponsesEv
 	if evt.Delta == "" {
 		return nil
 	}
-	content := evt.Delta
+	reasoning := evt.Delta
-	return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{Content: &content})}
+	return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{ReasoningContent: &reasoning})}
 }
 func resToChatHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventToChatState) []ChatCompletionsChunk {
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -361,11 +361,12 @@ type ChatStreamOptions struct {
 // ChatMessage is a single message in the Chat Completions conversation.
 type ChatMessage struct {
-	Role       string          `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
+	Role             string          `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
-	Content    json.RawMessage `json:"content,omitempty"`
+	Content          json.RawMessage `json:"content,omitempty"`
-	Name       string          `json:"name,omitempty"`
+	ReasoningContent string          `json:"reasoning_content,omitempty"`
-	ToolCalls  []ChatToolCall  `json:"tool_calls,omitempty"`
+	Name             string          `json:"name,omitempty"`
-	ToolCallID string          `json:"tool_call_id,omitempty"`
+	ToolCalls        []ChatToolCall  `json:"tool_calls,omitempty"`
 	ToolCallID       string          `json:"tool_call_id,omitempty"`
 	// Legacy function calling
 	FunctionCall *ChatFunctionCall `json:"function_call,omitempty"`
@@ -466,9 +467,10 @@ type ChatChunkChoice struct {
 // ChatDelta carries incremental content in a streaming chunk.
 type ChatDelta struct {
-	Role      string         `json:"role,omitempty"`
+	Role             string         `json:"role,omitempty"`
-	Content   *string        `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
+	Content          *string        `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
-	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
+	ReasoningContent *string        `json:"reasoning_content,omitempty"`
 	ToolCalls        []ChatToolCall `json:"tool_calls,omitempty"`
 }
 // ---------------------------------------------------------------------------