From ab81d6e444b6627ff88d4f0a56a8122b79196de6 Mon Sep 17 00:00:00 2001 From: John Chen Date: Tue, 30 Dec 2025 17:38:32 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=99=BA=E6=99=AE?= =?UTF-8?q?=E3=80=81Moonshot=E6=B8=A0=E9=81=93=E5=9C=A8stream=3Dtrue?= =?UTF-8?q?=E6=97=B6=E6=97=A0=E6=B3=95=E6=8B=BF=E5=88=B0cachePrompt?= =?UTF-8?q?=E7=9A=84=E7=BB=9F=E8=AE=A1=E6=95=B0=E6=8D=AE=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 根本原因: 1. 在OaiStreamHandler流式处理函数中,调用applyUsagePostProcessing(info, usage, nil)时传入的responseBody为nil,导致无法从响应体中提取缓存tokens。 2. 两个渠道的cached_tokens位置不同: - 智普:标准位置 usage.prompt_tokens_details.cached_tokens - Moonshot:非标准位置 choices[].usage.cached_tokens 处理方案: 1. 传递body信息到applyUsagePostProcessing中 2. 拆分智普和Moonshot的解析,并为Moonshot单独写一个解析方法。 --- relay/channel/openai/relay-openai.go | 47 ++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index ac44312eb..a4c6ef605 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -186,7 +186,7 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re usage.CompletionTokens += toolCount * 7 } - applyUsagePostProcessing(info, usage, nil) + applyUsagePostProcessing(info, usage, common.StringToByteSlice(lastStreamData)) HandleFinalResponse(c, info, lastStreamData, responseId, createAt, model, systemFingerprint, usage, containStreamUsage) @@ -596,7 +596,8 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 { usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens } - case constant.ChannelTypeZhipu_v4, constant.ChannelTypeMoonshot: + case constant.ChannelTypeZhipu_v4: + // 智普的cached_tokens在标准位置: usage.prompt_tokens_details.cached_tokens if usage.PromptTokensDetails.CachedTokens == 0 { if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 { usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens @@ -606,6 +607,19 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens } } + case constant.ChannelTypeMoonshot: + // Moonshot的cached_tokens在非标准位置: choices[].usage.cached_tokens + if usage.PromptTokensDetails.CachedTokens == 0 { + if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 { + usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens + } else if cachedTokens, ok := extractMoonshotCachedTokensFromBody(responseBody); ok { + usage.PromptTokensDetails.CachedTokens = cachedTokens + } else if cachedTokens, ok := extractCachedTokensFromBody(responseBody); ok { + usage.PromptTokensDetails.CachedTokens = cachedTokens + } else if usage.PromptCacheHitTokens > 0 { + usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens + } + } } } @@ -639,3 +653,32 @@ func extractCachedTokensFromBody(body []byte) (int, bool) { } return 0, false } + +// extractMoonshotCachedTokensFromBody 从Moonshot的非标准位置提取cached_tokens +// Moonshot的流式响应格式: {"choices":[{"usage":{"cached_tokens":111}}]} +func extractMoonshotCachedTokensFromBody(body []byte) (int, bool) { + if len(body) == 0 { + return 0, false + } + + var payload struct { + Choices []struct { + Usage struct { + CachedTokens *int `json:"cached_tokens"` + } `json:"usage"` + } `json:"choices"` + } + + if err := common.Unmarshal(body, &payload); err != nil { + return 0, false + } + + // 遍历choices查找cached_tokens + for _, choice := range payload.Choices { + if choice.Usage.CachedTokens != nil && *choice.Usage.CachedTokens > 0 { + return *choice.Usage.CachedTokens, true + } + } + + return 0, false +}