feat(gemini): support cached token billing

This commit is contained in:
RedwindA
2026-02-01 22:50:47 +08:00
parent 1c983a04d3
commit e3f96120bc
4 changed files with 12 additions and 5 deletions

View File

@@ -449,11 +449,12 @@ type GeminiChatResponse struct {
} }
type GeminiUsageMetadata struct { type GeminiUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount"` PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"` CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"` TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"` ThoughtsTokenCount int `json:"thoughtsTokenCount"`
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"` CachedContentTokenCount int `json:"cachedContentTokenCount"`
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
} }
type GeminiPromptTokensDetails struct { type GeminiPromptTokensDetails struct {

View File

@@ -49,6 +49,7 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
} }
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails { for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" { if detail.Modality == "AUDIO" {

View File

@@ -1251,6 +1251,7 @@ func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails { for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" { if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount usage.PromptTokensDetails.AudioTokens = detail.TokenCount
@@ -1395,6 +1396,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount, PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
} }
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails { for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" { if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount usage.PromptTokensDetails.AudioTokens = detail.TokenCount
@@ -1447,6 +1449,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
} }
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails { for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {

View File

@@ -8,6 +8,8 @@ import (
) )
var defaultCacheRatio = map[string]float64{ var defaultCacheRatio = map[string]float64{
"gemini-3-flash-preview": 0.25,
"gemini-3-pro-preview": 0.25,
"gpt-4": 0.5, "gpt-4": 0.5,
"o1": 0.5, "o1": 0.5,
"o1-2024-12-17": 0.5, "o1-2024-12-17": 0.5,