diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go index e8b8212d6..d8616d2d9 100644 --- a/relay/channel/gemini/adaptor.go +++ b/relay/channel/gemini/adaptor.go @@ -13,6 +13,7 @@ import ( relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/relay/constant" "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/reasoning" "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" @@ -137,7 +138,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking") } else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") { info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking") - } else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" { + } else if baseModel, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" { info.UpstreamModelName = baseModel } } diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index f75a92140..db5ea489c 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -98,6 +98,7 @@ func clampThinkingBudget(modelName string, budget int) int { // "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens) // "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens) // "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens) +// "effort": "minimal" - Allocates a minimal portion of tokens (approximately 5% of max_tokens) func clampThinkingBudgetByEffort(modelName string, effort string) int { isNew25Pro := isNew25ProModel(modelName) is25FlashLite := is25FlashLiteModel(modelName) @@ -118,18 +119,12 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int { maxBudget = maxBudget * 50 / 100 case "low": maxBudget = maxBudget * 20 / 100 + case "minimal": + maxBudget = maxBudget * 5 / 100 } return clampThinkingBudget(modelName, maxBudget) } -func parseThinkingLevelSuffix(modelName string) (string, string) { - base, level, ok := reasoning.TrimEffortSuffix(modelName) - if !ok { - return modelName, "" - } - return base, level -} - func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) { if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { modelName := info.UpstreamModelName @@ -186,7 +181,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel ThinkingBudget: common.GetPointer(0), } } - } else if _, level := parseThinkingLevelSuffix(modelName); level != "" { + } else if _, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" { geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{ IncludeThoughts: true, ThinkingLevel: level, diff --git a/setting/reasoning/suffix.go b/setting/reasoning/suffix.go index 4cc74b612..da3bdc7d3 100644 --- a/setting/reasoning/suffix.go +++ b/setting/reasoning/suffix.go @@ -6,7 +6,7 @@ import ( "github.com/samber/lo" ) -var EffortSuffixes = []string{"-high", "-medium", "-low"} +var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"} // TrimEffortSuffix -> modelName level(low) exists func TrimEffortSuffix(modelName string) (string, string, bool) {