mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 05:02:17 +00:00
Merge pull request #2887 from seefs001/fix/claude
fix: 补全 streaming message_delta 事件缺失的 input_tokens 和 cache 相关字段
This commit is contained in:
111
relay/channel/claude/message_delta_usage_patch_test.go
Normal file
111
relay/channel/claude/message_delta_usage_patch_test.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/QuantumNous/new-api/setting/model_setting"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestPatchClaudeMessageDeltaUsageDataPreserveUnknownFields(t *testing.T) {
|
||||
originalData := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":53},"vendor_meta":{"trace_id":"trace_001"}}`
|
||||
usage := &dto.ClaudeUsage{
|
||||
InputTokens: 100,
|
||||
CacheReadInputTokens: 30,
|
||||
CacheCreationInputTokens: 50,
|
||||
}
|
||||
|
||||
patchedData := patchClaudeMessageDeltaUsageData(originalData, usage)
|
||||
|
||||
require.Equal(t, "message_delta", gjson.Get(patchedData, "type").String())
|
||||
require.Equal(t, "end_turn", gjson.Get(patchedData, "delta.stop_reason").String())
|
||||
require.Equal(t, "trace_001", gjson.Get(patchedData, "vendor_meta.trace_id").String())
|
||||
require.EqualValues(t, 53, gjson.Get(patchedData, "usage.output_tokens").Int())
|
||||
require.EqualValues(t, 100, gjson.Get(patchedData, "usage.input_tokens").Int())
|
||||
require.EqualValues(t, 30, gjson.Get(patchedData, "usage.cache_read_input_tokens").Int())
|
||||
require.EqualValues(t, 50, gjson.Get(patchedData, "usage.cache_creation_input_tokens").Int())
|
||||
}
|
||||
|
||||
func TestPatchClaudeMessageDeltaUsageDataZeroValueChecks(t *testing.T) {
|
||||
originalData := `{"type":"message_delta","usage":{"output_tokens":53,"input_tokens":9,"cache_read_input_tokens":0}}`
|
||||
usage := &dto.ClaudeUsage{
|
||||
InputTokens: 100,
|
||||
CacheReadInputTokens: 30,
|
||||
CacheCreationInputTokens: 0,
|
||||
}
|
||||
|
||||
patchedData := patchClaudeMessageDeltaUsageData(originalData, usage)
|
||||
|
||||
require.EqualValues(t, 9, gjson.Get(patchedData, "usage.input_tokens").Int())
|
||||
require.EqualValues(t, 30, gjson.Get(patchedData, "usage.cache_read_input_tokens").Int())
|
||||
assert.False(t, gjson.Get(patchedData, "usage.cache_creation_input_tokens").Exists())
|
||||
}
|
||||
|
||||
func TestShouldSkipClaudeMessageDeltaUsagePatch(t *testing.T) {
|
||||
originGlobalPassThrough := model_setting.GetGlobalSettings().PassThroughRequestEnabled
|
||||
t.Cleanup(func() {
|
||||
model_setting.GetGlobalSettings().PassThroughRequestEnabled = originGlobalPassThrough
|
||||
})
|
||||
|
||||
model_setting.GetGlobalSettings().PassThroughRequestEnabled = true
|
||||
assert.True(t, shouldSkipClaudeMessageDeltaUsagePatch(&relaycommon.RelayInfo{}))
|
||||
|
||||
model_setting.GetGlobalSettings().PassThroughRequestEnabled = false
|
||||
assert.True(t, shouldSkipClaudeMessageDeltaUsagePatch(&relaycommon.RelayInfo{
|
||||
ChannelMeta: &relaycommon.ChannelMeta{ChannelSetting: dto.ChannelSettings{PassThroughBodyEnabled: true}},
|
||||
}))
|
||||
assert.False(t, shouldSkipClaudeMessageDeltaUsagePatch(&relaycommon.RelayInfo{
|
||||
ChannelMeta: &relaycommon.ChannelMeta{ChannelSetting: dto.ChannelSettings{PassThroughBodyEnabled: false}},
|
||||
}))
|
||||
}
|
||||
|
||||
func TestBuildMessageDeltaPatchUsage(t *testing.T) {
|
||||
t.Run("merge missing fields from claudeInfo", func(t *testing.T) {
|
||||
claudeResponse := &dto.ClaudeResponse{Usage: &dto.ClaudeUsage{OutputTokens: 53}}
|
||||
claudeInfo := &ClaudeResponseInfo{
|
||||
Usage: &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 30,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
ClaudeCacheCreation5mTokens: 10,
|
||||
ClaudeCacheCreation1hTokens: 20,
|
||||
},
|
||||
}
|
||||
|
||||
usage := buildMessageDeltaPatchUsage(claudeResponse, claudeInfo)
|
||||
require.NotNil(t, usage)
|
||||
require.EqualValues(t, 100, usage.InputTokens)
|
||||
require.EqualValues(t, 30, usage.CacheReadInputTokens)
|
||||
require.EqualValues(t, 50, usage.CacheCreationInputTokens)
|
||||
require.EqualValues(t, 53, usage.OutputTokens)
|
||||
require.NotNil(t, usage.CacheCreation)
|
||||
require.EqualValues(t, 10, usage.CacheCreation.Ephemeral5mInputTokens)
|
||||
require.EqualValues(t, 20, usage.CacheCreation.Ephemeral1hInputTokens)
|
||||
})
|
||||
|
||||
t.Run("keep upstream non-zero values", func(t *testing.T) {
|
||||
claudeResponse := &dto.ClaudeResponse{Usage: &dto.ClaudeUsage{
|
||||
InputTokens: 9,
|
||||
CacheReadInputTokens: 7,
|
||||
CacheCreationInputTokens: 6,
|
||||
}}
|
||||
claudeInfo := &ClaudeResponseInfo{Usage: &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 30,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
}}
|
||||
|
||||
usage := buildMessageDeltaPatchUsage(claudeResponse, claudeInfo)
|
||||
require.EqualValues(t, 9, usage.InputTokens)
|
||||
require.EqualValues(t, 7, usage.CacheReadInputTokens)
|
||||
require.EqualValues(t, 6, usage.CacheCreationInputTokens)
|
||||
})
|
||||
}
|
||||
@@ -21,6 +21,8 @@ import (
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -544,6 +546,78 @@ type ClaudeResponseInfo struct {
|
||||
Done bool
|
||||
}
|
||||
|
||||
func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage {
|
||||
usage := &dto.ClaudeUsage{}
|
||||
if claudeResponse != nil && claudeResponse.Usage != nil {
|
||||
*usage = *claudeResponse.Usage
|
||||
}
|
||||
|
||||
if claudeInfo == nil || claudeInfo.Usage == nil {
|
||||
return usage
|
||||
}
|
||||
|
||||
if usage.InputTokens == 0 && claudeInfo.Usage.PromptTokens > 0 {
|
||||
usage.InputTokens = claudeInfo.Usage.PromptTokens
|
||||
}
|
||||
if usage.CacheReadInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedTokens > 0 {
|
||||
usage.CacheReadInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedTokens
|
||||
}
|
||||
if usage.CacheCreationInputTokens == 0 && claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens > 0 {
|
||||
usage.CacheCreationInputTokens = claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens
|
||||
}
|
||||
if usage.CacheCreation == nil && (claudeInfo.Usage.ClaudeCacheCreation5mTokens > 0 || claudeInfo.Usage.ClaudeCacheCreation1hTokens > 0) {
|
||||
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
||||
Ephemeral5mInputTokens: claudeInfo.Usage.ClaudeCacheCreation5mTokens,
|
||||
Ephemeral1hInputTokens: claudeInfo.Usage.ClaudeCacheCreation1hTokens,
|
||||
}
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
func shouldSkipClaudeMessageDeltaUsagePatch(info *relaycommon.RelayInfo) bool {
|
||||
if model_setting.GetGlobalSettings().PassThroughRequestEnabled {
|
||||
return true
|
||||
}
|
||||
if info == nil {
|
||||
return false
|
||||
}
|
||||
return info.ChannelSetting.PassThroughBodyEnabled
|
||||
}
|
||||
|
||||
func patchClaudeMessageDeltaUsageData(data string, usage *dto.ClaudeUsage) string {
|
||||
if data == "" || usage == nil {
|
||||
return data
|
||||
}
|
||||
|
||||
data = setMessageDeltaUsageInt(data, "usage.input_tokens", usage.InputTokens)
|
||||
data = setMessageDeltaUsageInt(data, "usage.cache_read_input_tokens", usage.CacheReadInputTokens)
|
||||
data = setMessageDeltaUsageInt(data, "usage.cache_creation_input_tokens", usage.CacheCreationInputTokens)
|
||||
|
||||
if usage.CacheCreation != nil {
|
||||
data = setMessageDeltaUsageInt(data, "usage.cache_creation.ephemeral_5m_input_tokens", usage.CacheCreation.Ephemeral5mInputTokens)
|
||||
data = setMessageDeltaUsageInt(data, "usage.cache_creation.ephemeral_1h_input_tokens", usage.CacheCreation.Ephemeral1hInputTokens)
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func setMessageDeltaUsageInt(data string, path string, localValue int) string {
|
||||
if localValue <= 0 {
|
||||
return data
|
||||
}
|
||||
|
||||
upstreamValue := gjson.Get(data, path)
|
||||
if upstreamValue.Exists() && upstreamValue.Int() > 0 {
|
||||
return data
|
||||
}
|
||||
|
||||
patchedData, err := sjson.Set(data, path, localValue)
|
||||
if err != nil {
|
||||
return data
|
||||
}
|
||||
return patchedData
|
||||
}
|
||||
|
||||
func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *dto.ChatCompletionsStreamResponse, claudeInfo *ClaudeResponseInfo) bool {
|
||||
if claudeInfo == nil {
|
||||
return false
|
||||
@@ -638,6 +712,12 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
|
||||
if claudeResponse.Message != nil {
|
||||
info.UpstreamModelName = claudeResponse.Message.Model
|
||||
}
|
||||
} else if claudeResponse.Type == "message_delta" {
|
||||
// 确保 message_delta 的 usage 包含完整的 input_tokens 和 cache 相关字段
|
||||
// 解决 AWS Bedrock 等上游返回的 message_delta 缺少这些字段的问题
|
||||
if !shouldSkipClaudeMessageDeltaUsagePatch(info) {
|
||||
data = patchClaudeMessageDeltaUsageData(data, buildMessageDeltaPatchUsage(&claudeResponse, claudeInfo))
|
||||
}
|
||||
}
|
||||
helper.ClaudeChunkData(c, claudeResponse, data)
|
||||
} else if info.RelayFormat == types.RelayFormatOpenAI {
|
||||
|
||||
175
relay/channel/claude/relay_claude_test.go
Normal file
175
relay/channel/claude/relay_claude_test.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
)
|
||||
|
||||
func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) {
|
||||
claudeInfo := &ClaudeResponseInfo{
|
||||
Usage: &dto.Usage{},
|
||||
}
|
||||
claudeResponse := &dto.ClaudeResponse{
|
||||
Type: "message_start",
|
||||
Message: &dto.ClaudeMediaMessage{
|
||||
Id: "msg_123",
|
||||
Model: "claude-3-5-sonnet",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
InputTokens: 100,
|
||||
OutputTokens: 1,
|
||||
CacheCreationInputTokens: 50,
|
||||
CacheReadInputTokens: 30,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := FormatClaudeResponseInfo(claudeResponse, nil, claudeInfo)
|
||||
if !ok {
|
||||
t.Fatal("expected true")
|
||||
}
|
||||
if claudeInfo.Usage.PromptTokens != 100 {
|
||||
t.Errorf("PromptTokens = %d, want 100", claudeInfo.Usage.PromptTokens)
|
||||
}
|
||||
if claudeInfo.Usage.PromptTokensDetails.CachedTokens != 30 {
|
||||
t.Errorf("CachedTokens = %d, want 30", claudeInfo.Usage.PromptTokensDetails.CachedTokens)
|
||||
}
|
||||
if claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens != 50 {
|
||||
t.Errorf("CachedCreationTokens = %d, want 50", claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens)
|
||||
}
|
||||
if claudeInfo.ResponseId != "msg_123" {
|
||||
t.Errorf("ResponseId = %s, want msg_123", claudeInfo.ResponseId)
|
||||
}
|
||||
if claudeInfo.Model != "claude-3-5-sonnet" {
|
||||
t.Errorf("Model = %s, want claude-3-5-sonnet", claudeInfo.Model)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatClaudeResponseInfo_MessageDelta_FullUsage(t *testing.T) {
|
||||
// message_start 先积累 usage
|
||||
claudeInfo := &ClaudeResponseInfo{
|
||||
Usage: &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 30,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
CompletionTokens: 1,
|
||||
},
|
||||
}
|
||||
|
||||
// message_delta 带完整 usage(原生 Anthropic 场景)
|
||||
claudeResponse := &dto.ClaudeResponse{
|
||||
Type: "message_delta",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
InputTokens: 100,
|
||||
OutputTokens: 200,
|
||||
CacheCreationInputTokens: 50,
|
||||
CacheReadInputTokens: 30,
|
||||
},
|
||||
}
|
||||
|
||||
ok := FormatClaudeResponseInfo(claudeResponse, nil, claudeInfo)
|
||||
if !ok {
|
||||
t.Fatal("expected true")
|
||||
}
|
||||
if claudeInfo.Usage.PromptTokens != 100 {
|
||||
t.Errorf("PromptTokens = %d, want 100", claudeInfo.Usage.PromptTokens)
|
||||
}
|
||||
if claudeInfo.Usage.CompletionTokens != 200 {
|
||||
t.Errorf("CompletionTokens = %d, want 200", claudeInfo.Usage.CompletionTokens)
|
||||
}
|
||||
if claudeInfo.Usage.TotalTokens != 300 {
|
||||
t.Errorf("TotalTokens = %d, want 300", claudeInfo.Usage.TotalTokens)
|
||||
}
|
||||
if !claudeInfo.Done {
|
||||
t.Error("expected Done = true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatClaudeResponseInfo_MessageDelta_OnlyOutputTokens(t *testing.T) {
|
||||
// 模拟 Bedrock: message_start 已积累 usage
|
||||
claudeInfo := &ClaudeResponseInfo{
|
||||
Usage: &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 30,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
CompletionTokens: 1,
|
||||
ClaudeCacheCreation5mTokens: 10,
|
||||
ClaudeCacheCreation1hTokens: 20,
|
||||
},
|
||||
}
|
||||
|
||||
// Bedrock 的 message_delta 只有 output_tokens,缺少 input_tokens 和 cache 字段
|
||||
claudeResponse := &dto.ClaudeResponse{
|
||||
Type: "message_delta",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
OutputTokens: 200,
|
||||
// InputTokens, CacheCreationInputTokens, CacheReadInputTokens 都是 0
|
||||
},
|
||||
}
|
||||
|
||||
ok := FormatClaudeResponseInfo(claudeResponse, nil, claudeInfo)
|
||||
if !ok {
|
||||
t.Fatal("expected true")
|
||||
}
|
||||
// PromptTokens 应保持 message_start 的值(因为 message_delta 的 InputTokens=0,不更新)
|
||||
if claudeInfo.Usage.PromptTokens != 100 {
|
||||
t.Errorf("PromptTokens = %d, want 100", claudeInfo.Usage.PromptTokens)
|
||||
}
|
||||
if claudeInfo.Usage.CompletionTokens != 200 {
|
||||
t.Errorf("CompletionTokens = %d, want 200", claudeInfo.Usage.CompletionTokens)
|
||||
}
|
||||
if claudeInfo.Usage.TotalTokens != 300 {
|
||||
t.Errorf("TotalTokens = %d, want 300", claudeInfo.Usage.TotalTokens)
|
||||
}
|
||||
// cache 字段应保持 message_start 的值
|
||||
if claudeInfo.Usage.PromptTokensDetails.CachedTokens != 30 {
|
||||
t.Errorf("CachedTokens = %d, want 30", claudeInfo.Usage.PromptTokensDetails.CachedTokens)
|
||||
}
|
||||
if claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens != 50 {
|
||||
t.Errorf("CachedCreationTokens = %d, want 50", claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens)
|
||||
}
|
||||
if claudeInfo.Usage.ClaudeCacheCreation5mTokens != 10 {
|
||||
t.Errorf("ClaudeCacheCreation5mTokens = %d, want 10", claudeInfo.Usage.ClaudeCacheCreation5mTokens)
|
||||
}
|
||||
if claudeInfo.Usage.ClaudeCacheCreation1hTokens != 20 {
|
||||
t.Errorf("ClaudeCacheCreation1hTokens = %d, want 20", claudeInfo.Usage.ClaudeCacheCreation1hTokens)
|
||||
}
|
||||
if !claudeInfo.Done {
|
||||
t.Error("expected Done = true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatClaudeResponseInfo_NilClaudeInfo(t *testing.T) {
|
||||
claudeResponse := &dto.ClaudeResponse{Type: "message_start"}
|
||||
ok := FormatClaudeResponseInfo(claudeResponse, nil, nil)
|
||||
if ok {
|
||||
t.Error("expected false for nil claudeInfo")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatClaudeResponseInfo_ContentBlockDelta(t *testing.T) {
|
||||
text := "hello"
|
||||
claudeInfo := &ClaudeResponseInfo{
|
||||
Usage: &dto.Usage{},
|
||||
ResponseText: strings.Builder{},
|
||||
}
|
||||
claudeResponse := &dto.ClaudeResponse{
|
||||
Type: "content_block_delta",
|
||||
Delta: &dto.ClaudeMediaMessage{
|
||||
Text: &text,
|
||||
},
|
||||
}
|
||||
|
||||
ok := FormatClaudeResponseInfo(claudeResponse, nil, claudeInfo)
|
||||
if !ok {
|
||||
t.Fatal("expected true")
|
||||
}
|
||||
if claudeInfo.ResponseText.String() != "hello" {
|
||||
t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user