mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 02:25:00 +00:00
feat(gemini): support cached token billing
This commit is contained in:
@@ -449,11 +449,12 @@ type GeminiChatResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type GeminiUsageMetadata struct {
|
type GeminiUsageMetadata struct {
|
||||||
PromptTokenCount int `json:"promptTokenCount"`
|
PromptTokenCount int `json:"promptTokenCount"`
|
||||||
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
||||||
TotalTokenCount int `json:"totalTokenCount"`
|
TotalTokenCount int `json:"totalTokenCount"`
|
||||||
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
|
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
|
||||||
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
|
CachedContentTokenCount int `json:"cachedContentTokenCount"`
|
||||||
|
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GeminiPromptTokensDetails struct {
|
type GeminiPromptTokensDetails struct {
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
|
|||||||
}
|
}
|
||||||
|
|
||||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
|
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
|
||||||
|
|
||||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||||
if detail.Modality == "AUDIO" {
|
if detail.Modality == "AUDIO" {
|
||||||
|
|||||||
@@ -1251,6 +1251,7 @@ func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
|
|||||||
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
|
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
|
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
|
||||||
|
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
|
||||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||||
if detail.Modality == "AUDIO" {
|
if detail.Modality == "AUDIO" {
|
||||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||||
@@ -1395,6 +1396,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
|
|||||||
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
|
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
|
||||||
}
|
}
|
||||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
|
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
|
||||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||||
if detail.Modality == "AUDIO" {
|
if detail.Modality == "AUDIO" {
|
||||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||||
@@ -1447,6 +1449,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
|
|||||||
}
|
}
|
||||||
|
|
||||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
|
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
|
||||||
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
|
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
|
||||||
|
|
||||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var defaultCacheRatio = map[string]float64{
|
var defaultCacheRatio = map[string]float64{
|
||||||
|
"gemini-3-flash-preview": 0.25,
|
||||||
|
"gemini-3-pro-preview": 0.25,
|
||||||
"gpt-4": 0.5,
|
"gpt-4": 0.5,
|
||||||
"o1": 0.5,
|
"o1": 0.5,
|
||||||
"o1-2024-12-17": 0.5,
|
"o1-2024-12-17": 0.5,
|
||||||
|
|||||||
Reference in New Issue
Block a user