mirror of
https://github.com/Wei-Shaw/sub2api.git
synced 2026-03-30 04:05:29 +00:00
fix: consolidate chat-completions compatibility fixes
- apply default mapped model only when scheduling fallback is actually used - preserve reasoning in OpenAI-compatible output via reasoning_content and avoid invalid input function_call ids
This commit is contained in:
@@ -181,13 +181,7 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
|
|||||||
service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
|
service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
|
||||||
forwardStart := time.Now()
|
forwardStart := time.Now()
|
||||||
|
|
||||||
defaultMappedModel := ""
|
defaultMappedModel := c.GetString("openai_chat_completions_fallback_model")
|
||||||
if apiKey.Group != nil {
|
|
||||||
defaultMappedModel = apiKey.Group.DefaultMappedModel
|
|
||||||
}
|
|
||||||
if fallbackModel := c.GetString("openai_chat_completions_fallback_model"); fallbackModel != "" {
|
|
||||||
defaultMappedModel = fallbackModel
|
|
||||||
}
|
|
||||||
result, err := h.gatewayService.ForwardAsChatCompletions(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
|
result, err := h.gatewayService.ForwardAsChatCompletions(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
|
||||||
|
|
||||||
forwardDurationMs := time.Since(forwardStart).Milliseconds()
|
forwardDurationMs := time.Since(forwardStart).Milliseconds()
|
||||||
|
|||||||
@@ -655,14 +655,9 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
|
|||||||
service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
|
service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
|
||||||
forwardStart := time.Now()
|
forwardStart := time.Now()
|
||||||
|
|
||||||
defaultMappedModel := ""
|
// 仅在调度时实际触发了降级(原模型无可用账号、改用默认模型重试成功)时,
|
||||||
if apiKey.Group != nil {
|
// 才将降级模型传给 Forward 层做模型替换;否则保持用户请求的原始模型。
|
||||||
defaultMappedModel = apiKey.Group.DefaultMappedModel
|
defaultMappedModel := c.GetString("openai_messages_fallback_model")
|
||||||
}
|
|
||||||
// 如果使用了降级模型调度,强制使用降级模型
|
|
||||||
if fallbackModel := c.GetString("openai_messages_fallback_model"); fallbackModel != "" {
|
|
||||||
defaultMappedModel = fallbackModel
|
|
||||||
}
|
|
||||||
result, err := h.gatewayService.ForwardAsAnthropic(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
|
result, err := h.gatewayService.ForwardAsAnthropic(c.Request.Context(), c, account, body, promptCacheKey, defaultMappedModel)
|
||||||
|
|
||||||
forwardDurationMs := time.Since(forwardStart).Milliseconds()
|
forwardDurationMs := time.Since(forwardStart).Milliseconds()
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ func TestAnthropicToResponses_ToolUse(t *testing.T) {
|
|||||||
assert.Equal(t, "assistant", items[1].Role)
|
assert.Equal(t, "assistant", items[1].Role)
|
||||||
assert.Equal(t, "function_call", items[2].Type)
|
assert.Equal(t, "function_call", items[2].Type)
|
||||||
assert.Equal(t, "fc_call_1", items[2].CallID)
|
assert.Equal(t, "fc_call_1", items[2].CallID)
|
||||||
|
assert.Empty(t, items[2].ID)
|
||||||
assert.Equal(t, "function_call_output", items[3].Type)
|
assert.Equal(t, "function_call_output", items[3].Type)
|
||||||
assert.Equal(t, "fc_call_1", items[3].CallID)
|
assert.Equal(t, "fc_call_1", items[3].CallID)
|
||||||
assert.Equal(t, "Sunny, 72°F", items[3].Output)
|
assert.Equal(t, "Sunny, 72°F", items[3].Output)
|
||||||
|
|||||||
@@ -277,7 +277,6 @@ func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, e
|
|||||||
CallID: fcID,
|
CallID: fcID,
|
||||||
Name: b.Name,
|
Name: b.Name,
|
||||||
Arguments: args,
|
Arguments: args,
|
||||||
ID: fcID,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ func TestChatCompletionsToResponses_ToolCalls(t *testing.T) {
|
|||||||
// Check function_call item
|
// Check function_call item
|
||||||
assert.Equal(t, "function_call", items[1].Type)
|
assert.Equal(t, "function_call", items[1].Type)
|
||||||
assert.Equal(t, "call_1", items[1].CallID)
|
assert.Equal(t, "call_1", items[1].CallID)
|
||||||
|
assert.Empty(t, items[1].ID)
|
||||||
assert.Equal(t, "ping", items[1].Name)
|
assert.Equal(t, "ping", items[1].Name)
|
||||||
|
|
||||||
// Check function_call_output item
|
// Check function_call_output item
|
||||||
@@ -252,6 +253,55 @@ func TestChatCompletionsToResponses_AssistantWithTextAndToolCalls(t *testing.T)
|
|||||||
assert.Equal(t, "user", items[0].Role)
|
assert.Equal(t, "user", items[0].Role)
|
||||||
assert.Equal(t, "assistant", items[1].Role)
|
assert.Equal(t, "assistant", items[1].Role)
|
||||||
assert.Equal(t, "function_call", items[2].Type)
|
assert.Equal(t, "function_call", items[2].Type)
|
||||||
|
assert.Empty(t, items[2].ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChatCompletionsToResponses_AssistantArrayContentPreserved(t *testing.T) {
|
||||||
|
req := &ChatCompletionsRequest{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Messages: []ChatMessage{
|
||||||
|
{Role: "user", Content: json.RawMessage(`"Hi"`)},
|
||||||
|
{Role: "assistant", Content: json.RawMessage(`[{"type":"text","text":"A"},{"type":"text","text":"B"}]`)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := ChatCompletionsToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var items []ResponsesInputItem
|
||||||
|
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||||
|
require.Len(t, items, 2)
|
||||||
|
assert.Equal(t, "assistant", items[1].Role)
|
||||||
|
|
||||||
|
var parts []ResponsesContentPart
|
||||||
|
require.NoError(t, json.Unmarshal(items[1].Content, &parts))
|
||||||
|
require.Len(t, parts, 1)
|
||||||
|
assert.Equal(t, "output_text", parts[0].Type)
|
||||||
|
assert.Equal(t, "AB", parts[0].Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChatCompletionsToResponses_AssistantThinkingTagPreserved(t *testing.T) {
|
||||||
|
req := &ChatCompletionsRequest{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Messages: []ChatMessage{
|
||||||
|
{Role: "user", Content: json.RawMessage(`"Hi"`)},
|
||||||
|
{Role: "assistant", Content: json.RawMessage(`[{"type":"thinking","thinking":"internal plan"},{"type":"text","text":"final answer"}]`)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := ChatCompletionsToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var items []ResponsesInputItem
|
||||||
|
require.NoError(t, json.Unmarshal(resp.Input, &items))
|
||||||
|
require.Len(t, items, 2)
|
||||||
|
|
||||||
|
var parts []ResponsesContentPart
|
||||||
|
require.NoError(t, json.Unmarshal(items[1].Content, &parts))
|
||||||
|
require.Len(t, parts, 1)
|
||||||
|
assert.Equal(t, "output_text", parts[0].Type)
|
||||||
|
assert.Contains(t, parts[0].Text, "<thinking>internal plan</thinking>")
|
||||||
|
assert.Contains(t, parts[0].Text, "final answer")
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -344,8 +394,8 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {
|
|||||||
|
|
||||||
var content string
|
var content string
|
||||||
require.NoError(t, json.Unmarshal(chat.Choices[0].Message.Content, &content))
|
require.NoError(t, json.Unmarshal(chat.Choices[0].Message.Content, &content))
|
||||||
// Reasoning summary is prepended to text
|
assert.Equal(t, "The answer is 42.", content)
|
||||||
assert.Equal(t, "I thought about it.The answer is 42.", content)
|
assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
|
func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
|
||||||
@@ -582,8 +632,35 @@ func TestResponsesEventToChatChunks_ReasoningDelta(t *testing.T) {
|
|||||||
Delta: "Thinking...",
|
Delta: "Thinking...",
|
||||||
}, state)
|
}, state)
|
||||||
require.Len(t, chunks, 1)
|
require.Len(t, chunks, 1)
|
||||||
|
require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
|
||||||
|
assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.ReasoningContent)
|
||||||
|
|
||||||
|
chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
|
||||||
|
Type: "response.reasoning_summary_text.done",
|
||||||
|
}, state)
|
||||||
|
require.Len(t, chunks, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResponsesEventToChatChunks_ReasoningThenTextAutoCloseTag(t *testing.T) {
|
||||||
|
state := NewResponsesEventToChatState()
|
||||||
|
state.Model = "gpt-4o"
|
||||||
|
state.SentRole = true
|
||||||
|
|
||||||
|
chunks := ResponsesEventToChatChunks(&ResponsesStreamEvent{
|
||||||
|
Type: "response.reasoning_summary_text.delta",
|
||||||
|
Delta: "plan",
|
||||||
|
}, state)
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
|
require.NotNil(t, chunks[0].Choices[0].Delta.ReasoningContent)
|
||||||
|
assert.Equal(t, "plan", *chunks[0].Choices[0].Delta.ReasoningContent)
|
||||||
|
|
||||||
|
chunks = ResponsesEventToChatChunks(&ResponsesStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
Delta: "answer",
|
||||||
|
}, state)
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
require.NotNil(t, chunks[0].Choices[0].Delta.Content)
|
require.NotNil(t, chunks[0].Choices[0].Delta.Content)
|
||||||
assert.Equal(t, "Thinking...", *chunks[0].Choices[0].Delta.Content)
|
assert.Equal(t, "answer", *chunks[0].Choices[0].Delta.Content)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFinalizeResponsesChatStream(t *testing.T) {
|
func TestFinalizeResponsesChatStream(t *testing.T) {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package apicompat
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ChatCompletionsToResponses converts a Chat Completions request into a
|
// ChatCompletionsToResponses converts a Chat Completions request into a
|
||||||
@@ -174,8 +175,11 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
|||||||
|
|
||||||
// Emit assistant message with output_text if content is non-empty.
|
// Emit assistant message with output_text if content is non-empty.
|
||||||
if len(m.Content) > 0 {
|
if len(m.Content) > 0 {
|
||||||
var s string
|
s, err := parseAssistantContent(m.Content)
|
||||||
if err := json.Unmarshal(m.Content, &s); err == nil && s != "" {
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if s != "" {
|
||||||
parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
|
parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
|
||||||
partsJSON, err := json.Marshal(parts)
|
partsJSON, err := json.Marshal(parts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -196,13 +200,64 @@ func chatAssistantToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
|||||||
CallID: tc.ID,
|
CallID: tc.ID,
|
||||||
Name: tc.Function.Name,
|
Name: tc.Function.Name,
|
||||||
Arguments: args,
|
Arguments: args,
|
||||||
ID: tc.ID,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return items, nil
|
return items, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseAssistantContent returns assistant content as plain text.
|
||||||
|
//
|
||||||
|
// Supported formats:
|
||||||
|
// - JSON string
|
||||||
|
// - JSON array of typed parts (e.g. [{"type":"text","text":"..."}])
|
||||||
|
//
|
||||||
|
// For structured thinking/reasoning parts, it preserves semantics by wrapping
|
||||||
|
// the text in explicit tags so downstream can still distinguish it from normal text.
|
||||||
|
func parseAssistantContent(raw json.RawMessage) (string, error) {
|
||||||
|
if len(raw) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var s string
|
||||||
|
if err := json.Unmarshal(raw, &s); err == nil {
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var parts []map[string]any
|
||||||
|
if err := json.Unmarshal(raw, &parts); err != nil {
|
||||||
|
// Keep compatibility with prior behavior: unsupported assistant content
|
||||||
|
// formats are ignored instead of failing the whole request conversion.
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var b strings.Builder
|
||||||
|
for _, p := range parts {
|
||||||
|
typ, _ := p["type"].(string)
|
||||||
|
text, _ := p["text"].(string)
|
||||||
|
thinking, _ := p["thinking"].(string)
|
||||||
|
|
||||||
|
switch typ {
|
||||||
|
case "thinking", "reasoning":
|
||||||
|
if thinking != "" {
|
||||||
|
b.WriteString("<thinking>")
|
||||||
|
b.WriteString(thinking)
|
||||||
|
b.WriteString("</thinking>")
|
||||||
|
} else if text != "" {
|
||||||
|
b.WriteString("<thinking>")
|
||||||
|
b.WriteString(text)
|
||||||
|
b.WriteString("</thinking>")
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if text != "" {
|
||||||
|
b.WriteString(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
// chatToolToResponses converts a tool result message (role=tool) into a
|
// chatToolToResponses converts a tool result message (role=tool) into a
|
||||||
// function_call_output item.
|
// function_call_output item.
|
||||||
func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
func chatToolToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
|
|||||||
}
|
}
|
||||||
|
|
||||||
var contentText string
|
var contentText string
|
||||||
|
var reasoningText string
|
||||||
var toolCalls []ChatToolCall
|
var toolCalls []ChatToolCall
|
||||||
|
|
||||||
for _, item := range resp.Output {
|
for _, item := range resp.Output {
|
||||||
@@ -51,7 +52,7 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
|
|||||||
case "reasoning":
|
case "reasoning":
|
||||||
for _, s := range item.Summary {
|
for _, s := range item.Summary {
|
||||||
if s.Type == "summary_text" && s.Text != "" {
|
if s.Type == "summary_text" && s.Text != "" {
|
||||||
contentText += s.Text
|
reasoningText += s.Text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "web_search_call":
|
case "web_search_call":
|
||||||
@@ -67,6 +68,9 @@ func ResponsesToChatCompletions(resp *ResponsesResponse, model string) *ChatComp
|
|||||||
raw, _ := json.Marshal(contentText)
|
raw, _ := json.Marshal(contentText)
|
||||||
msg.Content = raw
|
msg.Content = raw
|
||||||
}
|
}
|
||||||
|
if reasoningText != "" {
|
||||||
|
msg.ReasoningContent = reasoningText
|
||||||
|
}
|
||||||
|
|
||||||
finishReason := responsesStatusToChatFinishReason(resp.Status, resp.IncompleteDetails, toolCalls)
|
finishReason := responsesStatusToChatFinishReason(resp.Status, resp.IncompleteDetails, toolCalls)
|
||||||
|
|
||||||
@@ -153,6 +157,8 @@ func ResponsesEventToChatChunks(evt *ResponsesStreamEvent, state *ResponsesEvent
|
|||||||
return resToChatHandleFuncArgsDelta(evt, state)
|
return resToChatHandleFuncArgsDelta(evt, state)
|
||||||
case "response.reasoning_summary_text.delta":
|
case "response.reasoning_summary_text.delta":
|
||||||
return resToChatHandleReasoningDelta(evt, state)
|
return resToChatHandleReasoningDelta(evt, state)
|
||||||
|
case "response.reasoning_summary_text.done":
|
||||||
|
return nil
|
||||||
case "response.completed", "response.incomplete", "response.failed":
|
case "response.completed", "response.incomplete", "response.failed":
|
||||||
return resToChatHandleCompleted(evt, state)
|
return resToChatHandleCompleted(evt, state)
|
||||||
default:
|
default:
|
||||||
@@ -276,8 +282,8 @@ func resToChatHandleReasoningDelta(evt *ResponsesStreamEvent, state *ResponsesEv
|
|||||||
if evt.Delta == "" {
|
if evt.Delta == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
content := evt.Delta
|
reasoning := evt.Delta
|
||||||
return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{Content: &content})}
|
return []ChatCompletionsChunk{makeChatDeltaChunk(state, ChatDelta{ReasoningContent: &reasoning})}
|
||||||
}
|
}
|
||||||
|
|
||||||
func resToChatHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventToChatState) []ChatCompletionsChunk {
|
func resToChatHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventToChatState) []ChatCompletionsChunk {
|
||||||
|
|||||||
@@ -361,11 +361,12 @@ type ChatStreamOptions struct {
|
|||||||
|
|
||||||
// ChatMessage is a single message in the Chat Completions conversation.
|
// ChatMessage is a single message in the Chat Completions conversation.
|
||||||
type ChatMessage struct {
|
type ChatMessage struct {
|
||||||
Role string `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
|
Role string `json:"role"` // "system" | "user" | "assistant" | "tool" | "function"
|
||||||
Content json.RawMessage `json:"content,omitempty"`
|
Content json.RawMessage `json:"content,omitempty"`
|
||||||
Name string `json:"name,omitempty"`
|
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
Name string `json:"name,omitempty"`
|
||||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
||||||
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||||
|
|
||||||
// Legacy function calling
|
// Legacy function calling
|
||||||
FunctionCall *ChatFunctionCall `json:"function_call,omitempty"`
|
FunctionCall *ChatFunctionCall `json:"function_call,omitempty"`
|
||||||
@@ -466,9 +467,10 @@ type ChatChunkChoice struct {
|
|||||||
|
|
||||||
// ChatDelta carries incremental content in a streaming chunk.
|
// ChatDelta carries incremental content in a streaming chunk.
|
||||||
type ChatDelta struct {
|
type ChatDelta struct {
|
||||||
Role string `json:"role,omitempty"`
|
Role string `json:"role,omitempty"`
|
||||||
Content *string `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
|
Content *string `json:"content,omitempty"` // pointer: omit when not present, null vs "" matters
|
||||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
ReasoningContent *string `json:"reasoning_content,omitempty"`
|
||||||
|
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user