From a01a77fc6f0e467b815b2f89a8bedb561f32117b Mon Sep 17 00:00:00 2001 From: Seefs <40468931+seefs001@users.noreply.github.com> Date: Sun, 22 Feb 2026 23:30:02 +0800 Subject: [PATCH] fix: claude affinity cache counter (#2980) * fix: claude affinity cache counter * fix: claude affinity cache counter * fix: stabilize cache usage stats format and simplify modal rendering --- relay/common/relay_info.go | 16 ++- relay/common/relay_info_test.go | 40 ++++++ relay/compatible_handler.go | 4 +- service/channel_affinity.go | 60 +++++++- service/channel_affinity_usage_cache_test.go | 105 ++++++++++++++ service/quota.go | 3 + .../modals/ChannelAffinityUsageCacheModal.jsx | 129 ++++++++++++------ 7 files changed, 304 insertions(+), 53 deletions(-) create mode 100644 relay/common/relay_info_test.go create mode 100644 service/channel_affinity_usage_cache_test.go diff --git a/relay/common/relay_info.go b/relay/common/relay_info.go index 541f1b9f8..e88f4e51f 100644 --- a/relay/common/relay_info.go +++ b/relay/common/relay_info.go @@ -152,7 +152,8 @@ type RelayInfo struct { // RequestConversionChain records request format conversions in order, e.g. // ["openai", "openai_responses"] or ["openai", "claude"]. RequestConversionChain []types.RelayFormat - // 最终请求到上游的格式 TODO: 当前仅设置了Claude + // 最终请求到上游的格式。可由 adaptor 显式设置; + // 若为空,调用 GetFinalRequestRelayFormat 会回退到 RequestConversionChain 的最后一项或 RelayFormat。 FinalRequestRelayFormat types.RelayFormat ThinkingContentInfo @@ -579,6 +580,19 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) { info.RequestConversionChain = append(info.RequestConversionChain, format) } +func (info *RelayInfo) GetFinalRequestRelayFormat() types.RelayFormat { + if info == nil { + return "" + } + if info.FinalRequestRelayFormat != "" { + return info.FinalRequestRelayFormat + } + if n := len(info.RequestConversionChain); n > 0 { + return info.RequestConversionChain[n-1] + } + return info.RelayFormat +} + func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo { info := genBaseRelayInfo(c, request) if info.RelayMode == relayconstant.RelayModeUnknown { diff --git a/relay/common/relay_info_test.go b/relay/common/relay_info_test.go new file mode 100644 index 000000000..e53ec804c --- /dev/null +++ b/relay/common/relay_info_test.go @@ -0,0 +1,40 @@ +package common + +import ( + "testing" + + "github.com/QuantumNous/new-api/types" + "github.com/stretchr/testify/require" +) + +func TestRelayInfoGetFinalRequestRelayFormatPrefersExplicitFinal(t *testing.T) { + info := &RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude}, + FinalRequestRelayFormat: types.RelayFormatOpenAIResponses, + } + + require.Equal(t, types.RelayFormat(types.RelayFormatOpenAIResponses), info.GetFinalRequestRelayFormat()) +} + +func TestRelayInfoGetFinalRequestRelayFormatFallsBackToConversionChain(t *testing.T) { + info := &RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude}, + } + + require.Equal(t, types.RelayFormat(types.RelayFormatClaude), info.GetFinalRequestRelayFormat()) +} + +func TestRelayInfoGetFinalRequestRelayFormatFallsBackToRelayFormat(t *testing.T) { + info := &RelayInfo{ + RelayFormat: types.RelayFormatGemini, + } + + require.Equal(t, types.RelayFormat(types.RelayFormatGemini), info.GetFinalRequestRelayFormat()) +} + +func TestRelayInfoGetFinalRequestRelayFormatNilReceiver(t *testing.T) { + var info *RelayInfo + require.Equal(t, types.RelayFormat(""), info.GetFinalRequestRelayFormat()) +} diff --git a/relay/compatible_handler.go b/relay/compatible_handler.go index e7adddbbf..cb25da0b3 100644 --- a/relay/compatible_handler.go +++ b/relay/compatible_handler.go @@ -232,7 +232,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage } if originUsage != nil { - service.ObserveChannelAffinityUsageCacheFromContext(ctx, usage) + service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat()) } adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason) @@ -336,7 +336,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage var audioInputQuota decimal.Decimal var audioInputPrice float64 - isClaudeUsageSemantic := relayInfo.FinalRequestRelayFormat == types.RelayFormatClaude + isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude if !relayInfo.PriceData.UsePrice { baseTokens := dPromptTokens // 减去 cached tokens diff --git a/service/channel_affinity.go b/service/channel_affinity.go index fe1524c59..524c6574a 100644 --- a/service/channel_affinity.go +++ b/service/channel_affinity.go @@ -13,6 +13,7 @@ import ( "github.com/QuantumNous/new-api/dto" "github.com/QuantumNous/new-api/pkg/cachex" "github.com/QuantumNous/new-api/setting/operation_setting" + "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" "github.com/samber/hot" "github.com/tidwall/gjson" @@ -61,6 +62,12 @@ type ChannelAffinityStatsContext struct { TTLSeconds int64 } +const ( + cacheTokenRateModeCachedOverPrompt = "cached_over_prompt" + cacheTokenRateModeCachedOverPromptPlusCached = "cached_over_prompt_plus_cached" + cacheTokenRateModeMixed = "mixed" +) + type ChannelAffinityCacheStats struct { Enabled bool `json:"enabled"` Total int `json:"total"` @@ -565,9 +572,10 @@ func RecordChannelAffinity(c *gin.Context, channelID int) { } type ChannelAffinityUsageCacheStats struct { - RuleName string `json:"rule_name"` - UsingGroup string `json:"using_group"` - KeyFingerprint string `json:"key_fp"` + RuleName string `json:"rule_name"` + UsingGroup string `json:"using_group"` + KeyFingerprint string `json:"key_fp"` + CachedTokenRateMode string `json:"cached_token_rate_mode"` Hit int64 `json:"hit"` Total int64 `json:"total"` @@ -582,6 +590,8 @@ type ChannelAffinityUsageCacheStats struct { } type ChannelAffinityUsageCacheCounters struct { + CachedTokenRateMode string `json:"cached_token_rate_mode"` + Hit int64 `json:"hit"` Total int64 `json:"total"` WindowSeconds int64 `json:"window_seconds"` @@ -596,12 +606,17 @@ type ChannelAffinityUsageCacheCounters struct { var channelAffinityUsageCacheStatsLocks [64]sync.Mutex -func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) { +// ObserveChannelAffinityUsageCacheByRelayFormat records usage cache stats with a stable rate mode derived from relay format. +func ObserveChannelAffinityUsageCacheByRelayFormat(c *gin.Context, usage *dto.Usage, relayFormat types.RelayFormat) { + ObserveChannelAffinityUsageCacheFromContext(c, usage, cachedTokenRateModeByRelayFormat(relayFormat)) +} + +func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage, cachedTokenRateMode string) { statsCtx, ok := GetChannelAffinityStatsContext(c) if !ok { return } - observeChannelAffinityUsageCache(statsCtx, usage) + observeChannelAffinityUsageCache(statsCtx, usage, cachedTokenRateMode) } func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats { @@ -628,6 +643,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann } } return ChannelAffinityUsageCacheStats{ + CachedTokenRateMode: v.CachedTokenRateMode, RuleName: ruleName, UsingGroup: usingGroup, KeyFingerprint: keyFp, @@ -643,7 +659,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann } } -func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) { +func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage, cachedTokenRateMode string) { entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint) if entryKey == "" { return @@ -669,6 +685,14 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag if !found { next = ChannelAffinityUsageCacheCounters{} } + currentMode := normalizeCachedTokenRateMode(cachedTokenRateMode) + if currentMode != "" { + if next.CachedTokenRateMode == "" { + next.CachedTokenRateMode = currentMode + } else if next.CachedTokenRateMode != currentMode && next.CachedTokenRateMode != cacheTokenRateModeMixed { + next.CachedTokenRateMode = cacheTokenRateModeMixed + } + } next.Total++ hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage) if hit { @@ -684,6 +708,30 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag _ = cache.SetWithTTL(entryKey, next, ttl) } +func normalizeCachedTokenRateMode(mode string) string { + switch mode { + case cacheTokenRateModeCachedOverPrompt: + return cacheTokenRateModeCachedOverPrompt + case cacheTokenRateModeCachedOverPromptPlusCached: + return cacheTokenRateModeCachedOverPromptPlusCached + case cacheTokenRateModeMixed: + return cacheTokenRateModeMixed + default: + return "" + } +} + +func cachedTokenRateModeByRelayFormat(relayFormat types.RelayFormat) string { + switch relayFormat { + case types.RelayFormatOpenAI, types.RelayFormatOpenAIResponses, types.RelayFormatOpenAIResponsesCompaction: + return cacheTokenRateModeCachedOverPrompt + case types.RelayFormatClaude: + return cacheTokenRateModeCachedOverPromptPlusCached + default: + return "" + } +} + func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string { ruleName = strings.TrimSpace(ruleName) usingGroup = strings.TrimSpace(usingGroup) diff --git a/service/channel_affinity_usage_cache_test.go b/service/channel_affinity_usage_cache_test.go new file mode 100644 index 000000000..64d3d715b --- /dev/null +++ b/service/channel_affinity_usage_cache_test.go @@ -0,0 +1,105 @@ +package service + +import ( + "fmt" + "net/http/httptest" + "testing" + "time" + + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/types" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string) *gin.Context { + rec := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(rec) + setChannelAffinityContext(ctx, channelAffinityMeta{ + CacheKey: fmt.Sprintf("test:%s:%s:%s", ruleName, usingGroup, keyFP), + TTLSeconds: 600, + RuleName: ruleName, + UsingGroup: usingGroup, + KeyFingerprint: keyFP, + }) + return ctx +} + +func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T) { + ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano()) + usingGroup := "default" + keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano()) + ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP) + + usage := &dto.Usage{ + PromptTokens: 100, + CompletionTokens: 40, + TotalTokens: 140, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 30, + }, + } + + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatClaude) + stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP) + + require.EqualValues(t, 1, stats.Total) + require.EqualValues(t, 1, stats.Hit) + require.EqualValues(t, 100, stats.PromptTokens) + require.EqualValues(t, 40, stats.CompletionTokens) + require.EqualValues(t, 140, stats.TotalTokens) + require.EqualValues(t, 30, stats.CachedTokens) + require.Equal(t, cacheTokenRateModeCachedOverPromptPlusCached, stats.CachedTokenRateMode) +} + +func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) { + ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano()) + usingGroup := "default" + keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano()) + ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP) + + openAIUsage := &dto.Usage{ + PromptTokens: 100, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 10, + }, + } + claudeUsage := &dto.Usage{ + PromptTokens: 80, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 20, + }, + } + + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, openAIUsage, types.RelayFormatOpenAI) + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, claudeUsage, types.RelayFormatClaude) + stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP) + + require.EqualValues(t, 2, stats.Total) + require.EqualValues(t, 2, stats.Hit) + require.EqualValues(t, 180, stats.PromptTokens) + require.EqualValues(t, 30, stats.CachedTokens) + require.Equal(t, cacheTokenRateModeMixed, stats.CachedTokenRateMode) +} + +func TestObserveChannelAffinityUsageCacheByRelayFormat_UnsupportedModeKeepsEmpty(t *testing.T) { + ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano()) + usingGroup := "default" + keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano()) + ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP) + + usage := &dto.Usage{ + PromptTokens: 100, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 25, + }, + } + + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatGemini) + stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP) + + require.EqualValues(t, 1, stats.Total) + require.EqualValues(t, 1, stats.Hit) + require.EqualValues(t, 25, stats.CachedTokens) + require.Equal(t, "", stats.CachedTokenRateMode) +} diff --git a/service/quota.go b/service/quota.go index 50421017e..7ee70edd5 100644 --- a/service/quota.go +++ b/service/quota.go @@ -236,6 +236,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod } func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) { + if usage != nil { + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat()) + } useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() promptTokens := usage.PromptTokens diff --git a/web/src/components/table/usage-logs/modals/ChannelAffinityUsageCacheModal.jsx b/web/src/components/table/usage-logs/modals/ChannelAffinityUsageCacheModal.jsx index ea1a5c7fb..383ebabc1 100644 --- a/web/src/components/table/usage-logs/modals/ChannelAffinityUsageCacheModal.jsx +++ b/web/src/components/table/usage-logs/modals/ChannelAffinityUsageCacheModal.jsx @@ -39,6 +39,21 @@ function formatTokenRate(n, d) { return `${r.toFixed(2)}%`; } +function formatCachedTokenRate(cachedTokens, promptTokens, mode) { + if (mode === 'cached_over_prompt_plus_cached') { + const denominator = Number(promptTokens || 0) + Number(cachedTokens || 0); + return formatTokenRate(cachedTokens, denominator); + } + if (mode === 'cached_over_prompt') { + return formatTokenRate(cachedTokens, promptTokens); + } + return '-'; +} + +function hasTextValue(value) { + return typeof value === 'string' && value.trim() !== ''; +} + const ChannelAffinityUsageCacheModal = ({ t, showChannelAffinityUsageCacheModal, @@ -107,7 +122,7 @@ const ChannelAffinityUsageCacheModal = ({ t, ]); - const rows = useMemo(() => { + const { rows, supportsTokenStats } = useMemo(() => { const s = stats || {}; const hit = Number(s.hit || 0); const total = Number(s.total || 0); @@ -118,48 +133,62 @@ const ChannelAffinityUsageCacheModal = ({ const totalTokens = Number(s.total_tokens || 0); const cachedTokens = Number(s.cached_tokens || 0); const promptCacheHitTokens = Number(s.prompt_cache_hit_tokens || 0); + const cachedTokenRateMode = String(s.cached_token_rate_mode || '').trim(); + const supportsTokenStats = + cachedTokenRateMode === 'cached_over_prompt' || + cachedTokenRateMode === 'cached_over_prompt_plus_cached' || + cachedTokenRateMode === 'mixed'; - return [ - { key: t('规则'), value: s.rule_name || params.rule_name || '-' }, - { key: t('分组'), value: s.using_group || params.using_group || '-' }, - { - key: t('Key 摘要'), - value: params.key_hint || '-', - }, - { - key: t('Key 指纹'), - value: s.key_fp || params.key_fp || '-', - }, - { key: t('TTL(秒)'), value: windowSeconds > 0 ? windowSeconds : '-' }, - { - key: t('命中率'), - value: `${hit}/${total} (${formatRate(hit, total)})`, - }, - { - key: t('Prompt tokens'), - value: promptTokens, - }, - { - key: t('Cached tokens'), - value: `${cachedTokens} (${formatTokenRate(cachedTokens, promptTokens)})`, - }, - { - key: t('Prompt cache hit tokens'), - value: promptCacheHitTokens, - }, - { - key: t('Completion tokens'), - value: completionTokens, - }, - { - key: t('Total tokens'), - value: totalTokens, - }, - { - key: t('最近一次'), - value: lastSeenAt > 0 ? timestamp2string(lastSeenAt) : '-', - }, - ]; + const data = []; + const ruleName = String(s.rule_name || params.rule_name || '').trim(); + const usingGroup = String(s.using_group || params.using_group || '').trim(); + const keyHint = String(params.key_hint || '').trim(); + const keyFp = String(s.key_fp || params.key_fp || '').trim(); + + if (hasTextValue(ruleName)) { + data.push({ key: t('规则'), value: ruleName }); + } + if (hasTextValue(usingGroup)) { + data.push({ key: t('分组'), value: usingGroup }); + } + if (hasTextValue(keyHint)) { + data.push({ key: t('Key 摘要'), value: keyHint }); + } + if (hasTextValue(keyFp)) { + data.push({ key: t('Key 指纹'), value: keyFp }); + } + if (windowSeconds > 0) { + data.push({ key: t('TTL(秒)'), value: windowSeconds }); + } + if (total > 0) { + data.push({ key: t('命中率'), value: `${hit}/${total} (${formatRate(hit, total)})` }); + } + if (lastSeenAt > 0) { + data.push({ key: t('最近一次'), value: timestamp2string(lastSeenAt) }); + } + + if (supportsTokenStats) { + if (promptTokens > 0) { + data.push({ key: t('Prompt tokens'), value: promptTokens }); + } + if (promptTokens > 0 || cachedTokens > 0) { + data.push({ + key: t('Cached tokens'), + value: `${cachedTokens} (${formatCachedTokenRate(cachedTokens, promptTokens, cachedTokenRateMode)})`, + }); + } + if (promptCacheHitTokens > 0) { + data.push({ key: t('Prompt cache hit tokens'), value: promptCacheHitTokens }); + } + if (completionTokens > 0) { + data.push({ key: t('Completion tokens'), value: completionTokens }); + } + if (totalTokens > 0) { + data.push({ key: t('Total tokens'), value: totalTokens }); + } + } + + return { rows: data, supportsTokenStats }; }, [stats, params, t]); return ( @@ -179,15 +208,27 @@ const ChannelAffinityUsageCacheModal = ({ {t( '命中判定:usage 中存在 cached tokens(例如 cached_tokens/prompt_cache_hit_tokens)即视为命中。', )} + {' '} + {t( + 'Cached tokens 占比口径由后端返回:Claude 语义按 cached/(prompt+cached),其余按 cached/prompt。', + )} + {' '} + {t('当前仅 OpenAI / Claude 语义支持缓存 token 统计,其他通道将隐藏 token 相关字段。')} + {stats && !supportsTokenStats ? ( + <> + {' '} + {t('该记录不包含可用的 token 统计口径。')} + + ) : null} - {stats ? ( + {stats && rows.length > 0 ? ( ) : (
- {loading ? t('加载中...') : t('暂无数据')} + {loading ? t('加载中...') : t('暂无可展示数据')}
)}