Compare commits

..

11 Commits

Author SHA1 Message Date
CaIon
f17b3810d6 feat(user): simplify user response structure in JSON output 2025-12-25 15:39:58 +08:00
Calcium-Ion
8206084a77 Merge pull request #2524 from seefs001/fix/revert-model-ratio
fix: revert model ratio
2025-12-25 15:38:36 +08:00
Seefs
559da6362a fix: revert model ratio 2025-12-25 15:37:54 +08:00
Calcium-Ion
0b1a562df9 Merge pull request #2477 from 1420970597/fix/anthropic-cache-billing
fix: 修复 Anthropic 渠道缓存计费错误
2025-12-24 16:59:23 +08:00
Seefs
a0c3d37d66 Merge pull request #2493 from shikaiwei1/patch-1 2025-12-24 16:52:24 +08:00
Seefs
347f2326f3 Merge pull request #2511 from JerryKwan/issue2499 2025-12-24 16:51:51 +08:00
Jerry
31a79620ba Resolving event mismatch in OpenAI2Claude
add stricter validation for content_block_start corresponding to
tool call
and fix the crash issue when Claude Code is processing tool call
2025-12-24 14:52:39 +08:00
Calcium-Ion
12555a37d3 Merge pull request #2510 from feitianbubu/pr/0e7050dc89c1b761069f5e528d8ecf786e7008ae
修复claudeResponse流式请求空指针Panic
2025-12-24 14:15:51 +08:00
feitianbubu
3652dfdbd5 fix: check claudeResponse delta StopReason nil point 2025-12-24 11:54:23 +08:00
John Chen
dbaba87c39 为Moonshot添加缓存tokens读取逻辑
为Moonshot添加缓存tokens读取逻辑。其与智普V4的逻辑相同,所以共用逻辑
2025-12-22 17:05:16 +08:00
长安
0a2f12c04e fix: 修复 Anthropic 渠道缓存计费错误
## 问题描述

当使用 Anthropic 渠道通过 `/v1/chat/completions` 端点调用且启用缓存功能时,
计费逻辑错误地减去了缓存 tokens,导致严重的收入损失(94.5%)。

## 根本原因

不同 API 的 `prompt_tokens` 定义不同:

- **Anthropic API**: `input_tokens` 字段已经是纯输入 tokens(不包含缓存)
- **OpenAI API**: `prompt_tokens` 字段包含所有 tokens(包含缓存)
- **OpenRouter API**: `prompt_tokens` 字段包含所有 tokens(包含缓存)

当前 `postConsumeQuota` 函数对所有渠道都减去缓存 tokens,这对 Anthropic
渠道是错误的,因为其 `input_tokens` 已经不包含缓存。

## 修复方案

在 `relay/compatible_handler.go` 的 `postConsumeQuota` 函数中,添加渠道类型判断:

```go
if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
    baseTokens = baseTokens.Sub(dCacheTokens)
}
```

只对非 Anthropic 渠道减去缓存 tokens。

## 影响分析

###  不受影响的场景

1. **无缓存调用**(所有渠道)
   - cache_tokens = 0
   - 减去 0 = 不减去
   - 结果:完全一致

2. **OpenAI/OpenRouter 渠道 + 缓存**
   - 继续减去缓存(因为 ChannelType != Anthropic)
   - 结果:完全一致

3. **Anthropic 渠道 + /v1/messages 端点**
   - 使用 PostClaudeConsumeQuota(不修改)
   - 结果:完全不受影响

###  修复的场景

4. **Anthropic 渠道 + /v1/chat/completions + 缓存**
   - 修复前:错误地减去缓存,导致 94.5% 收入损失
   - 修复后:不减去缓存,计费正确

## 验证数据

以实际记录 143509 为例:

| 项目 | 修复前 | 修复后 | 差异 |
|------|--------|--------|------|
| Quota | 10,489 | 191,330 | +180,841 |
| 费用 | ¥0.020978 | ¥0.382660 | +¥0.361682 |
| 收入恢复 | - | - | **+1724.1%** |

## 测试建议

1. 测试 Anthropic 渠道 + 缓存场景
2. 测试 OpenAI 渠道 + 缓存场景(确保不受影响)
3. 测试无缓存场景(确保不受影响)

## 相关 Issue

修复 Anthropic 渠道使用 prompt caching 时的计费错误。
2025-12-20 14:17:12 +08:00
9 changed files with 53 additions and 93 deletions

View File

@@ -40,13 +40,6 @@ type testResult struct {
newAPIError *types.NewAPIError
}
// testChannel executes a test request against the given channel using the provided testModel and optional endpointType,
// and returns a testResult containing the test context and any encountered error information.
// It selects or derives a model when testModel is empty, auto-detects the request endpoint (chat, responses, embeddings, images, rerank) when endpointType is not specified,
// converts and relays the request to the upstream adapter, and parses the upstream response to collect usage and pricing information.
// On upstream responses that indicate the chat/completions `messages` parameter is unsupported and endpointType was not specified, it will retry the test using the Responses API.
// The function records consumption logs and returns a testResult with a populated context on success, or with localErr/newAPIError set on failure;
// for channel types that are not supported for testing it returns a localErr explaining that the channel test is not supported.
func testChannel(channel *model.Channel, testModel string, endpointType string) testResult {
tik := time.Now()
var unsupportedTestChannelTypes = []int{
@@ -82,8 +75,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
}
originTestModel := testModel
requestPath := "/v1/chat/completions"
// 如果指定了端点类型,使用指定的端点类型
@@ -93,10 +84,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
} else {
// 如果没有指定端点类型,使用原有的自动检测逻辑
if common.IsOpenAIResponseOnlyModel(testModel) {
requestPath = "/v1/responses"
}
// 先判断是否为 Embedding 模型
if strings.Contains(strings.ToLower(testModel), "embedding") ||
strings.HasPrefix(testModel, "m3e") || // m3e 系列模型
@@ -332,13 +319,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
httpResp = resp.(*http.Response)
if httpResp.StatusCode != http.StatusOK {
err := service.RelayErrorHandler(c.Request.Context(), httpResp, true)
// 自动检测模式下,如果上游不支持 chat.completions 的 messages 参数,尝试切换到 Responses API 再测一次。
if endpointType == "" && requestPath == "/v1/chat/completions" && err != nil {
lowerErr := strings.ToLower(err.Error())
if strings.Contains(lowerErr, "unsupported parameter") && strings.Contains(lowerErr, "messages") {
return testChannel(channel, originTestModel, string(constant.EndpointTypeOpenAIResponse))
}
}
return testResult{
context: c,
localErr: err,
@@ -409,7 +389,6 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
}
// for embedding models, and otherwise a chat/completion request with model-specific token limit heuristics.
func buildTestRequest(model string, endpointType string) dto.Request {
// 根据端点类型构建不同的测试请求
if endpointType != "" {
@@ -438,12 +417,9 @@ func buildTestRequest(model string, endpointType string) dto.Request {
}
case constant.EndpointTypeOpenAIResponse:
// 返回 OpenAIResponsesRequest
maxOutputTokens := uint(10)
return &dto.OpenAIResponsesRequest{
Model: model,
Input: json.RawMessage(`[{"role":"user","content":"hi"}]`),
MaxOutputTokens: maxOutputTokens,
Stream: true,
Model: model,
Input: json.RawMessage("\"hi\""),
}
case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI:
// 返回 GeneralOpenAIRequest
@@ -466,16 +442,6 @@ func buildTestRequest(model string, endpointType string) dto.Request {
}
// 自动检测逻辑(保持原有行为)
if common.IsOpenAIResponseOnlyModel(model) {
maxOutputTokens := uint(10)
return &dto.OpenAIResponsesRequest{
Model: model,
Input: json.RawMessage(`[{"role":"user","content":"hi"}]`),
MaxOutputTokens: maxOutputTokens,
Stream: true,
}
}
// 先判断是否为 Embedding 模型
if strings.Contains(strings.ToLower(model), "embedding") ||
strings.HasPrefix(model, "m3e") ||
@@ -674,4 +640,4 @@ func AutomaticallyTestChannels() {
}
}
})
}
}

View File

@@ -110,18 +110,17 @@ func setupLogin(user *model.User, c *gin.Context) {
})
return
}
cleanUser := model.User{
Id: user.Id,
Username: user.Username,
DisplayName: user.DisplayName,
Role: user.Role,
Status: user.Status,
Group: user.Group,
}
c.JSON(http.StatusOK, gin.H{
"message": "",
"success": true,
"data": cleanUser,
"data": map[string]any{
"id": user.Id,
"username": user.Username,
"display_name": user.DisplayName,
"role": user.Role,
"status": user.Status,
"group": user.Group,
},
})
}

View File

@@ -483,9 +483,11 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
}
}
} else if claudeResponse.Type == "message_delta" {
finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
if finishReason != "null" {
choice.FinishReason = &finishReason
if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil {
finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
if finishReason != "null" {
choice.FinishReason = &finishReason
}
}
//claudeUsage = &claudeResponse.Usage
} else if claudeResponse.Type == "message_stop" {

View File

@@ -596,7 +596,7 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res
if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 {
usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
}
case constant.ChannelTypeZhipu_v4:
case constant.ChannelTypeZhipu_v4, constant.ChannelTypeMoonshot:
if usage.PromptTokensDetails.CachedTokens == 0 {
if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens

View File

@@ -300,14 +300,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
if !relayInfo.PriceData.UsePrice {
baseTokens := dPromptTokens
// 减去 cached tokens
// Anthropic API 的 input_tokens 已经不包含缓存 tokens不需要减去
// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens需要减去
var cachedTokensWithRatio decimal.Decimal
if !dCacheTokens.IsZero() {
baseTokens = baseTokens.Sub(dCacheTokens)
if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
baseTokens = baseTokens.Sub(dCacheTokens)
}
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
}
var dCachedCreationTokensWithRatio decimal.Decimal
if !dCachedCreationTokens.IsZero() {
baseTokens = baseTokens.Sub(dCachedCreationTokens)
if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
baseTokens = baseTokens.Sub(dCachedCreationTokens)
}
dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
}

View File

@@ -110,8 +110,6 @@ func GetAndValidateEmbeddingRequest(c *gin.Context, relayMode int) (*dto.Embeddi
return embeddingRequest, nil
}
// GetAndValidateResponsesRequest parses the HTTP request body into an OpenAIResponsesRequest and ensures the Model field is provided.
// It returns the parsed request, or an error if the body cannot be parsed or the Model is empty.
func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest, error) {
request := &dto.OpenAIResponsesRequest{}
err := common.UnmarshalBodyReusable(c, request)
@@ -121,6 +119,9 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest
if request.Model == "" {
return nil, errors.New("model is required")
}
if request.Input == nil {
return nil, errors.New("input is required")
}
return request, nil
}
@@ -323,4 +324,4 @@ func GetAndValidateGeminiBatchEmbeddingRequest(c *gin.Context) (*dto.GeminiBatch
return nil, err
}
return request, nil
}
}

View File

@@ -389,25 +389,29 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
}
idx := blockIndex
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Index: &idx,
Type: "content_block_start",
ContentBlock: &dto.ClaudeMediaMessage{
Id: toolCall.ID,
Type: "tool_use",
Name: toolCall.Function.Name,
Input: map[string]interface{}{},
},
})
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Index: &idx,
Type: "content_block_delta",
Delta: &dto.ClaudeMediaMessage{
Type: "input_json_delta",
PartialJson: &toolCall.Function.Arguments,
},
})
if toolCall.Function.Name != "" {
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Index: &idx,
Type: "content_block_start",
ContentBlock: &dto.ClaudeMediaMessage{
Id: toolCall.ID,
Type: "tool_use",
Name: toolCall.Function.Name,
Input: map[string]interface{}{},
},
})
}
if len(toolCall.Function.Arguments) > 0 {
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Index: &idx,
Type: "content_block_delta",
Delta: &dto.ClaudeMediaMessage{
Type: "input_json_delta",
PartialJson: &toolCall.Function.Arguments,
},
})
}
info.ClaudeConvertInfo.Index = blockIndex
}

View File

@@ -81,24 +81,11 @@ func ClaudeErrorWrapperLocal(err error, code string, statusCode int) *dto.Claude
return claudeErr
}
// RelayErrorHandler converts an HTTP error response into a structured types.NewAPIError.
// It returns a NewAPIError initialized with the response status code and one of:
// - an Err describing an absent or unreadable body,
// - an Err containing the unmarshaled error message (or status + raw body when showBodyWhenFail is true), or
// - an embedded OpenAI-style error when the response body contains a compatible error object.
// The returned NewAPIError's status code reflects resp.StatusCode.
func RelayErrorHandler(ctx context.Context, resp *http.Response, showBodyWhenFail bool) (newApiErr *types.NewAPIError) {
newApiErr = types.InitOpenAIError(types.ErrorCodeBadResponseStatusCode, resp.StatusCode)
if resp.Body == nil {
newApiErr.Err = errors.New("response body is nil")
return
}
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
CloseResponseBodyGracefully(resp)
newApiErr.Err = fmt.Errorf("read response body failed: %w", err)
return
}
CloseResponseBodyGracefully(resp)
@@ -169,4 +156,4 @@ func TaskErrorWrapper(err error, code string, statusCode int) *dto.TaskError {
}
return taskError
}
}

View File

@@ -7,7 +7,6 @@ import (
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/reasoning"
)
// from songquanpeng/one-api
@@ -829,10 +828,6 @@ func FormatMatchingModelName(name string) string {
name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*")
}
if base, _, ok := reasoning.TrimEffortSuffix(name); ok {
name = base
}
if strings.HasPrefix(name, "gpt-4-gizmo") {
name = "gpt-4-gizmo-*"
}