feat(user): simplify user response structure in JSON output

Merge pull request #2524 from seefs001/fix/revert-model-ratio
fix: revert model ratio
2026-04-01 01:42:43 +00:00 · 2025-12-25 15:39:58 +08:00 · 2025-12-25 15:38:36 +08:00 · 2025-12-25 15:37:54 +08:00 · 2025-12-24 16:59:23 +08:00 · 2025-12-24 16:52:24 +08:00
9 changed files with 53 additions and 93 deletions
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -40,13 +40,6 @@ type testResult struct {
 	newAPIError *types.NewAPIError
 }

-// testChannel executes a test request against the given channel using the provided testModel and optional endpointType,
-// and returns a testResult containing the test context and any encountered error information.
-// It selects or derives a model when testModel is empty, auto-detects the request endpoint (chat, responses, embeddings, images, rerank) when endpointType is not specified,
-// converts and relays the request to the upstream adapter, and parses the upstream response to collect usage and pricing information.
-// On upstream responses that indicate the chat/completions `messages` parameter is unsupported and endpointType was not specified, it will retry the test using the Responses API.
-// The function records consumption logs and returns a testResult with a populated context on success, or with localErr/newAPIError set on failure;
-// for channel types that are not supported for testing it returns a localErr explaining that the channel test is not supported.
 func testChannel(channel *model.Channel, testModel string, endpointType string) testResult {
 	tik := time.Now()
 	var unsupportedTestChannelTypes = []int{
@@ -82,8 +75,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		}
 	}

-	originTestModel := testModel
-
 	requestPath := "/v1/chat/completions"

 	// 如果指定了端点类型，使用指定的端点类型
@@ -93,10 +84,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		}
 	} else {
 		// 如果没有指定端点类型，使用原有的自动检测逻辑
-		if common.IsOpenAIResponseOnlyModel(testModel) {
-			requestPath = "/v1/responses"
-		}
-
 		// 先判断是否为 Embedding 模型
 		if strings.Contains(strings.ToLower(testModel), "embedding") ||
 			strings.HasPrefix(testModel, "m3e") || // m3e 系列模型
@@ -332,13 +319,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		httpResp = resp.(*http.Response)
 		if httpResp.StatusCode != http.StatusOK {
 			err := service.RelayErrorHandler(c.Request.Context(), httpResp, true)
-			// 自动检测模式下，如果上游不支持 chat.completions 的 messages 参数，尝试切换到 Responses API 再测一次。
-			if endpointType == "" && requestPath == "/v1/chat/completions" && err != nil {
-				lowerErr := strings.ToLower(err.Error())
-				if strings.Contains(lowerErr, "unsupported parameter") && strings.Contains(lowerErr, "messages") {
-					return testChannel(channel, originTestModel, string(constant.EndpointTypeOpenAIResponse))
-				}
-			}
 			return testResult{
 				context:     c,
 				localErr:    err,
@@ -409,7 +389,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 	}
 }

-// for embedding models, and otherwise a chat/completion request with model-specific token limit heuristics.
 func buildTestRequest(model string, endpointType string) dto.Request {
 	// 根据端点类型构建不同的测试请求
 	if endpointType != "" {
@@ -438,12 +417,9 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 			}
 		case constant.EndpointTypeOpenAIResponse:
 			// 返回 OpenAIResponsesRequest
-			maxOutputTokens := uint(10)
 			return &dto.OpenAIResponsesRequest{
-				Model:           model,
-				Input:           json.RawMessage(`[{"role":"user","content":"hi"}]`),
-				MaxOutputTokens: maxOutputTokens,
-				Stream:          true,
+				Model: model,
+				Input: json.RawMessage("\"hi\""),
 			}
 		case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI:
 			// 返回 GeneralOpenAIRequest
@@ -466,16 +442,6 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 	}

 	// 自动检测逻辑（保持原有行为）
-	if common.IsOpenAIResponseOnlyModel(model) {
-		maxOutputTokens := uint(10)
-		return &dto.OpenAIResponsesRequest{
-			Model:           model,
-			Input:           json.RawMessage(`[{"role":"user","content":"hi"}]`),
-			MaxOutputTokens: maxOutputTokens,
-			Stream:          true,
-		}
-	}
-
 	// 先判断是否为 Embedding 模型
 	if strings.Contains(strings.ToLower(model), "embedding") ||
 		strings.HasPrefix(model, "m3e") ||
@@ -674,4 +640,4 @@ func AutomaticallyTestChannels() {
 			}
 		}
 	})
-}
+}
--- a/controller/user.go
+++ b/controller/user.go
@@ -110,18 +110,17 @@ func setupLogin(user *model.User, c *gin.Context) {
 		})
 		return
 	}
-	cleanUser := model.User{
-		Id:          user.Id,
-		Username:    user.Username,
-		DisplayName: user.DisplayName,
-		Role:        user.Role,
-		Status:      user.Status,
-		Group:       user.Group,
-	}
 	c.JSON(http.StatusOK, gin.H{
 		"message": "",
 		"success": true,
-		"data":    cleanUser,
+		"data": map[string]any{
+			"id":           user.Id,
+			"username":     user.Username,
+			"display_name": user.DisplayName,
+			"role":         user.Role,
+			"status":       user.Status,
+			"group":        user.Group,
+		},
 	})
 }

--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -483,9 +483,11 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
 				}
 			}
 		} else if claudeResponse.Type == "message_delta" {
-			finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
-			if finishReason != "null" {
-				choice.FinishReason = &finishReason
+			if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil {
+				finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
+				if finishReason != "null" {
+					choice.FinishReason = &finishReason
+				}
 			}
 			//claudeUsage = &claudeResponse.Usage
 		} else if claudeResponse.Type == "message_stop" {
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -596,7 +596,7 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res
 		if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 {
 			usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
 		}
-	case constant.ChannelTypeZhipu_v4:
+	case constant.ChannelTypeZhipu_v4, constant.ChannelTypeMoonshot:
 		if usage.PromptTokensDetails.CachedTokens == 0 {
 			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
 				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
--- a/relay/compatible_handler.go
+++ b/relay/compatible_handler.go
@@ -300,14 +300,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
 	if !relayInfo.PriceData.UsePrice {
 		baseTokens := dPromptTokens
 		// 减去 cached tokens
+		// Anthropic API 的 input_tokens 已经不包含缓存 tokens，不需要减去
+		// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens，需要减去
 		var cachedTokensWithRatio decimal.Decimal
 		if !dCacheTokens.IsZero() {
-			baseTokens = baseTokens.Sub(dCacheTokens)
+			if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
+				baseTokens = baseTokens.Sub(dCacheTokens)
+			}
 			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
 		}
 		var dCachedCreationTokensWithRatio decimal.Decimal
 		if !dCachedCreationTokens.IsZero() {
-			baseTokens = baseTokens.Sub(dCachedCreationTokens)
+			if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
+				baseTokens = baseTokens.Sub(dCachedCreationTokens)
+			}
 			dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
 		}

--- a/relay/helper/valid_request.go
+++ b/relay/helper/valid_request.go
@@ -110,8 +110,6 @@ func GetAndValidateEmbeddingRequest(c *gin.Context, relayMode int) (*dto.Embeddi
 	return embeddingRequest, nil
 }

-// GetAndValidateResponsesRequest parses the HTTP request body into an OpenAIResponsesRequest and ensures the Model field is provided.
-// It returns the parsed request, or an error if the body cannot be parsed or the Model is empty.
 func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest, error) {
 	request := &dto.OpenAIResponsesRequest{}
 	err := common.UnmarshalBodyReusable(c, request)
@@ -121,6 +119,9 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest
 	if request.Model == "" {
 		return nil, errors.New("model is required")
 	}
+	if request.Input == nil {
+		return nil, errors.New("input is required")
+	}
 	return request, nil
 }

@@ -323,4 +324,4 @@ func GetAndValidateGeminiBatchEmbeddingRequest(c *gin.Context) (*dto.GeminiBatch
 		return nil, err
 	}
 	return request, nil
-}
+}
--- a/service/convert.go
+++ b/service/convert.go
@@ -389,25 +389,29 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 				}

 				idx := blockIndex
-				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
-					Index: &idx,
-					Type:  "content_block_start",
-					ContentBlock: &dto.ClaudeMediaMessage{
-						Id:    toolCall.ID,
-						Type:  "tool_use",
-						Name:  toolCall.Function.Name,
-						Input: map[string]interface{}{},
-					},
-				})
-
-				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
-					Index: &idx,
-					Type:  "content_block_delta",
-					Delta: &dto.ClaudeMediaMessage{
-						Type:        "input_json_delta",
-						PartialJson: &toolCall.Function.Arguments,
-					},
-				})
+				if toolCall.Function.Name != "" {
+					claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
+						Index: &idx,
+						Type:  "content_block_start",
+						ContentBlock: &dto.ClaudeMediaMessage{
+							Id:    toolCall.ID,
+							Type:  "tool_use",
+							Name:  toolCall.Function.Name,
+							Input: map[string]interface{}{},
+						},
+					})
+				}
+				
+				if len(toolCall.Function.Arguments) > 0 {
+					claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
+						Index: &idx,
+						Type:  "content_block_delta",
+						Delta: &dto.ClaudeMediaMessage{
+							Type:        "input_json_delta",
+							PartialJson: &toolCall.Function.Arguments,
+						},
+					})
+				}

 				info.ClaudeConvertInfo.Index = blockIndex
 			}
--- a/service/error.go
+++ b/service/error.go
@@ -81,24 +81,11 @@ func ClaudeErrorWrapperLocal(err error, code string, statusCode int) *dto.Claude
 	return claudeErr
 }

-// RelayErrorHandler converts an HTTP error response into a structured types.NewAPIError.
-// It returns a NewAPIError initialized with the response status code and one of:
-// - an Err describing an absent or unreadable body,
-// - an Err containing the unmarshaled error message (or status + raw body when showBodyWhenFail is true), or
-// - an embedded OpenAI-style error when the response body contains a compatible error object.
-// The returned NewAPIError's status code reflects resp.StatusCode.
 func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFail bool) (newApiErr *types.NewAPIError) {
 	newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode)

-	if resp.Body == nil {
-		newApiErr.Err = errors.New("response body is nil")
-		return
-	}
-
 	responseBody, err := io.ReadAll(resp.Body)
 	if err != nil {
-		CloseResponseBodyGracefully(resp)
-		newApiErr.Err = fmt.Errorf("read response body failed: %w", err)
 		return
 	}
 	CloseResponseBodyGracefully(resp)
@@ -169,4 +156,4 @@ func TaskErrorWrapper(err error, code string, statusCode int) *dto.TaskError {
 	}

 	return taskError
-}
+}
--- a/setting/ratio_setting/model_ratio.go
+++ b/setting/ratio_setting/model_ratio.go
@@ -7,7 +7,6 @@ import (

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
-	"github.com/QuantumNous/new-api/setting/reasoning"
 )

 // from songquanpeng/one-api
@@ -829,10 +828,6 @@ func FormatMatchingModelName(name string) string {
 		name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*")
 	}

-	if base, _, ok := reasoning.TrimEffortSuffix(name); ok {
-		name = base
-	}
-
 	if strings.HasPrefix(name, "gpt-4-gizmo") {
 		name = "gpt-4-gizmo-*"
 	}
Author	SHA1	Message	Date
CaIon	f17b3810d6	feat(user): simplify user response structure in JSON output	2025-12-25 15:39:58 +08:00
Calcium-Ion	8206084a77	Merge pull request #2524 from seefs001/fix/revert-model-ratio fix: revert model ratio	2025-12-25 15:38:36 +08:00
Seefs	559da6362a	fix: revert model ratio	2025-12-25 15:37:54 +08:00
Calcium-Ion	0b1a562df9	Merge pull request #2477 from 1420970597/fix/anthropic-cache-billing fix: 修复 Anthropic 渠道缓存计费错误	2025-12-24 16:59:23 +08:00
Seefs	a0c3d37d66	Merge pull request #2493 from shikaiwei1/patch-1	2025-12-24 16:52:24 +08:00
Seefs	347f2326f3	Merge pull request #2511 from JerryKwan/issue2499	2025-12-24 16:51:51 +08:00
Jerry	31a79620ba	Resolving event mismatch in OpenAI2Claude add stricter validation for content_block_start corresponding to tool call and fix the crash issue when Claude Code is processing tool call	2025-12-24 14:52:39 +08:00
Calcium-Ion	12555a37d3	Merge pull request #2510 from feitianbubu/pr/0e7050dc89c1b761069f5e528d8ecf786e7008ae 修复claudeResponse流式请求空指针Panic	2025-12-24 14:15:51 +08:00
feitianbubu	3652dfdbd5	fix: check claudeResponse delta StopReason nil point	2025-12-24 11:54:23 +08:00
John Chen	dbaba87c39	为Moonshot添加缓存tokens读取逻辑为Moonshot添加缓存tokens读取逻辑。其与智普V4的逻辑相同，所以共用逻辑	2025-12-22 17:05:16 +08:00
长安	0a2f12c04e	fix: 修复 Anthropic 渠道缓存计费错误 ## 问题描述当使用 Anthropic 渠道通过 `/v1/chat/completions` 端点调用且启用缓存功能时，计费逻辑错误地减去了缓存 tokens，导致严重的收入损失（94.5%）。 ## 根本原因不同 API 的 `prompt_tokens` 定义不同： - Anthropic API: `input_tokens` 字段已经是纯输入 tokens（不包含缓存） - OpenAI API: `prompt_tokens` 字段包含所有 tokens（包含缓存） - OpenRouter API: `prompt_tokens` 字段包含所有 tokens（包含缓存）当前 `postConsumeQuota` 函数对所有渠道都减去缓存 tokens，这对 Anthropic 渠道是错误的，因为其 `input_tokens` 已经不包含缓存。 ## 修复方案在 `relay/compatible_handler.go` 的 `postConsumeQuota` 函数中，添加渠道类型判断： ```go if relayInfo.ChannelType != constant.ChannelTypeAnthropic { baseTokens = baseTokens.Sub(dCacheTokens) } ``` 只对非 Anthropic 渠道减去缓存 tokens。 ## 影响分析 ### ✅ 不受影响的场景 1. 无缓存调用（所有渠道） - cache_tokens = 0 - 减去 0 = 不减去 - 结果：完全一致 2. OpenAI/OpenRouter 渠道 + 缓存 - 继续减去缓存（因为 ChannelType != Anthropic） - 结果：完全一致 3. Anthropic 渠道 + /v1/messages 端点 - 使用 PostClaudeConsumeQuota（不修改） - 结果：完全不受影响 ### ✅ 修复的场景 4. Anthropic 渠道 + /v1/chat/completions + 缓存 - 修复前：错误地减去缓存，导致 94.5% 收入损失 - 修复后：不减去缓存，计费正确 ## 验证数据以实际记录 143509 为例： \| 项目 \| 修复前 \| 修复后 \| 差异 \| \|------\|--------\|--------\|------\| \| Quota \| 10,489 \| 191,330 \| +180,841 \| \| 费用 \| ¥0.020978 \| ¥0.382660 \| +¥0.361682 \| \| 收入恢复 \| - \| - \| +1724.1% \| ## 测试建议 1. 测试 Anthropic 渠道 + 缓存场景 2. 测试 OpenAI 渠道 + 缓存场景（确保不受影响） 3. 测试无缓存场景（确保不受影响） ## 相关 Issue 修复 Anthropic 渠道使用 prompt caching 时的计费错误。	2025-12-20 14:17:12 +08:00