diff --git a/README.en.md b/README.en.md index a509f36c9..e71f5e623 100644 --- a/README.en.md +++ b/README.en.md @@ -238,6 +238,7 @@ docker run --name new-api -d --restart always \ - `gemini-2.5-flash-nothinking` - Disable thinking mode - `gemini-2.5-pro-thinking` - Enable thinking mode - `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens +- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed). diff --git a/README.fr.md b/README.fr.md index ec4ac5f8a..35051223e 100644 --- a/README.fr.md +++ b/README.fr.md @@ -234,6 +234,7 @@ docker run --name new-api -d --restart always \ - `gemini-2.5-flash-nothinking` - Désactiver le mode de pensée - `gemini-2.5-pro-thinking` - Activer le mode de pensée - `gemini-2.5-pro-thinking-128` - Activer le mode de pensée avec budget de pensée de 128 tokens +- Vous pouvez également ajouter les suffixes `-low`, `-medium` ou `-high` aux modèles Gemini pour fixer le niveau d’effort de raisonnement (sans suffixe de budget supplémentaire). diff --git a/README.ja.md b/README.ja.md index cbd4fdc16..0c4b91f66 100644 --- a/README.ja.md +++ b/README.ja.md @@ -243,6 +243,7 @@ docker run --name new-api -d --restart always \ - `gemini-2.5-flash-nothinking` - 思考モードを無効にする - `gemini-2.5-pro-thinking` - 思考モードを有効にする - `gemini-2.5-pro-thinking-128` - 思考モードを有効にし、思考予算を128トークンに設定する +- Gemini モデル名の末尾に `-low` / `-medium` / `-high` を付けることで推論強度を直接指定できます(追加の思考予算サフィックスは不要です)。 diff --git a/README.md b/README.md index 86ba5852e..3d5b6923c 100644 --- a/README.md +++ b/README.md @@ -239,6 +239,7 @@ docker run --name new-api -d --restart always \ - `gemini-2.5-flash-nothinking` - 禁用思考模式 - `gemini-2.5-pro-thinking` - 启用思考模式 - `gemini-2.5-pro-thinking-128` - 启用思考模式,并设置思考预算为128tokens +- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度(无需再设置思考预算后缀) diff --git a/dto/gemini.go b/dto/gemini.go index fa869b6d1..1ee71a719 100644 --- a/dto/gemini.go +++ b/dto/gemini.go @@ -142,7 +142,7 @@ type GeminiThinkingConfig struct { IncludeThoughts bool `json:"includeThoughts,omitempty"` ThinkingBudget *int `json:"thinkingBudget,omitempty"` // TODO Conflict with thinkingbudget. - ThinkingLevel json.RawMessage `json:"thinkingLevel,omitempty"` + ThinkingLevel string `json:"thinkingLevel,omitempty"` } // UnmarshalJSON allows GeminiThinkingConfig to accept both snake_case and camelCase fields. @@ -150,9 +150,9 @@ func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error { type Alias GeminiThinkingConfig var aux struct { Alias - IncludeThoughtsSnake *bool `json:"include_thoughts,omitempty"` - ThinkingBudgetSnake *int `json:"thinking_budget,omitempty"` - ThinkingLevelSnake json.RawMessage `json:"thinking_level,omitempty"` + IncludeThoughtsSnake *bool `json:"include_thoughts,omitempty"` + ThinkingBudgetSnake *int `json:"thinking_budget,omitempty"` + ThinkingLevelSnake string `json:"thinking_level,omitempty"` } if err := common.Unmarshal(data, &aux); err != nil { @@ -169,7 +169,7 @@ func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error { c.ThinkingBudget = aux.ThinkingBudgetSnake } - if len(aux.ThinkingLevelSnake) > 0 { + if aux.ThinkingLevelSnake != "" { c.ThinkingLevel = aux.ThinkingLevelSnake } diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go index b522ca1be..e8b8212d6 100644 --- a/relay/channel/gemini/adaptor.go +++ b/relay/channel/gemini/adaptor.go @@ -137,6 +137,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking") } else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") { info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking") + } else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" { + info.UpstreamModelName = baseModel } } diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index ae892ed89..f75a92140 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -19,8 +19,8 @@ import ( "github.com/QuantumNous/new-api/relay/helper" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/reasoning" "github.com/QuantumNous/new-api/types" - "github.com/gin-gonic/gin" ) @@ -122,6 +122,14 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int { return clampThinkingBudget(modelName, maxBudget) } +func parseThinkingLevelSuffix(modelName string) (string, string) { + base, level, ok := reasoning.TrimEffortSuffix(modelName) + if !ok { + return modelName, "" + } + return base, level +} + func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) { if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { modelName := info.UpstreamModelName @@ -178,6 +186,12 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel ThinkingBudget: common.GetPointer(0), } } + } else if _, level := parseThinkingLevelSuffix(modelName); level != "" { + geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{ + IncludeThoughts: true, + ThinkingLevel: level, + } + info.ReasoningEffort = level } } } diff --git a/setting/model_setting/global.go b/setting/model_setting/global.go index e8815c0e9..f51ebc89d 100644 --- a/setting/model_setting/global.go +++ b/setting/model_setting/global.go @@ -32,7 +32,7 @@ func GetGlobalSettings() *GlobalSettings { return &globalSettings } -// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking 后缀 +// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking/-low/-high/-medium 后缀 func ShouldPreserveThinkingSuffix(modelName string) bool { target := strings.TrimSpace(modelName) if target == "" { diff --git a/setting/ratio_setting/model_ratio.go b/setting/ratio_setting/model_ratio.go index 2354f3ec8..bd533db5c 100644 --- a/setting/ratio_setting/model_ratio.go +++ b/setting/ratio_setting/model_ratio.go @@ -7,6 +7,7 @@ import ( "github.com/QuantumNous/new-api/common" "github.com/QuantumNous/new-api/setting/operation_setting" + "github.com/QuantumNous/new-api/setting/reasoning" ) // from songquanpeng/one-api @@ -821,6 +822,10 @@ func FormatMatchingModelName(name string) string { name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*") } + if base, _, ok := reasoning.TrimEffortSuffix(name); ok { + name = base + } + if strings.HasPrefix(name, "gpt-4-gizmo") { name = "gpt-4-gizmo-*" } diff --git a/setting/reasoning/suffix.go b/setting/reasoning/suffix.go new file mode 100644 index 000000000..4cc74b612 --- /dev/null +++ b/setting/reasoning/suffix.go @@ -0,0 +1,20 @@ +package reasoning + +import ( + "strings" + + "github.com/samber/lo" +) + +var EffortSuffixes = []string{"-high", "-medium", "-low"} + +// TrimEffortSuffix -> modelName level(low) exists +func TrimEffortSuffix(modelName string) (string, string, bool) { + suffix, found := lo.Find(EffortSuffixes, func(s string) bool { + return strings.HasSuffix(modelName, s) + }) + if !found { + return modelName, "", false + } + return strings.TrimSuffix(modelName, suffix), strings.TrimPrefix(suffix, "-"), true +} diff --git a/web/src/i18n/locales/en.json b/web/src/i18n/locales/en.json index 82e58e24e..3f279e13a 100644 --- a/web/src/i18n/locales/en.json +++ b/web/src/i18n/locales/en.json @@ -1996,7 +1996,7 @@ "适用于个人使用的场景,不需要设置模型价格": "Suitable for personal use, no need to set model price.", "适用于为多个用户提供服务的场景": "Suitable for scenarios where multiple users are provided.", "适用于展示系统功能的场景,提供基础功能演示": "Suitable for scenarios where the system functions are displayed, providing basic feature demonstrations.", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapt to -thinking, -thinking-budget number, and -nothinking suffixes", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapt to -thinking, -thinking-budget number, -nothinking, and -low/-medium/-high suffixes", "选择充值套餐": "Choose a top-up package", "选择充值额度": "Select recharge amount", "选择分组": "Select group", @@ -2178,4 +2178,4 @@ "默认测试模型": "Default Test Model", "默认补全倍率": "Default completion ratio" } -} \ No newline at end of file +} diff --git a/web/src/i18n/locales/fr.json b/web/src/i18n/locales/fr.json index cec19094a..ed1df8a83 100644 --- a/web/src/i18n/locales/fr.json +++ b/web/src/i18n/locales/fr.json @@ -2006,7 +2006,7 @@ "适用于个人使用的场景,不需要设置模型价格": "Adapté à un usage personnel, pas besoin de définir le prix du modèle.", "适用于为多个用户提供服务的场景": "Adapté aux scénarios où plusieurs utilisateurs sont fournis.", "适用于展示系统功能的场景,提供基础功能演示": "Adapté aux scénarios où les fonctions du système sont affichées, fournissant des démonstrations de fonctionnalités de base.", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapter les suffixes -thinking, -thinking-budget et -nothinking", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Adapter les suffixes -thinking, -thinking-budget, -nothinking et -low/-medium/-high", "选择充值额度": "Sélectionner le montant de la recharge", "选择分组": "Sélectionner un groupe", "选择同步来源": "Sélectionner la source de synchronisation", @@ -2227,4 +2227,4 @@ "随机种子 (留空为随机)": "Graine aléatoire (laisser vide pour aléatoire)", "默认补全倍率": "Taux de complétion par défaut" } -} \ No newline at end of file +} diff --git a/web/src/i18n/locales/ja.json b/web/src/i18n/locales/ja.json index ba70cb475..0e4786c68 100644 --- a/web/src/i18n/locales/ja.json +++ b/web/src/i18n/locales/ja.json @@ -1903,7 +1903,7 @@ "适用于个人使用的场景,不需要设置模型价格": "個人利用のシナリオに適しており、モデル料金の設定は不要です", "适用于为多个用户提供服务的场景": "複数のユーザーにサービスを提供するシナリオに適しています", "适用于展示系统功能的场景,提供基础功能演示": "システムの機能を紹介するシナリオに適しており、基本的な機能のデモンストレーションを提供します", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "-thinking、-thinking-予算数値、-nothinkingサフィックスに対応", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "-thinking、-thinking-予算数値、-nothinking、および -low/-medium/-high サフィックスに対応", "选择充值额度": "チャージ額を選択", "选择分组": "グループを選択", "选择同步来源": "同期ソースを選択", @@ -2126,4 +2126,4 @@ "可选,用于复现结果": "オプション、結果の再現用", "随机种子 (留空为随机)": "ランダムシード(空欄でランダム)" } -} \ No newline at end of file +} diff --git a/web/src/i18n/locales/ru.json b/web/src/i18n/locales/ru.json index 18b4cb05a..92171a0c3 100644 --- a/web/src/i18n/locales/ru.json +++ b/web/src/i18n/locales/ru.json @@ -2017,7 +2017,7 @@ "适用于个人使用的场景,不需要设置模型价格": "Подходит для сценариев личного использования, не требует установки цен на модели", "适用于为多个用户提供服务的场景": "Подходит для сценариев предоставления услуг нескольким пользователям", "适用于展示系统功能的场景,提供基础功能演示": "Подходит для сценариев демонстрации системных функций, предоставляет демонстрацию базовых функций", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Адаптация суффиксов -thinking, -thinking-бюджетные-цифры и -nothinking", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Адаптация суффиксов -thinking, -thinking-бюджетные-цифры, -nothinking и -low/-medium/-high", "选择充值额度": "Выберите сумму пополнения", "选择分组": "Выберите группу", "选择同步来源": "Выберите источник синхронизации", @@ -2237,4 +2237,4 @@ "可选,用于复现结果": "Необязательно, для воспроизводимых результатов", "随机种子 (留空为随机)": "Случайное зерно (оставьте пустым для случайного)" } -} \ No newline at end of file +} diff --git a/web/src/i18n/locales/vi.json b/web/src/i18n/locales/vi.json index b170f43c6..8af562f7a 100644 --- a/web/src/i18n/locales/vi.json +++ b/web/src/i18n/locales/vi.json @@ -2197,7 +2197,7 @@ "适用于个人使用的场景,不需要设置模型价格": "Phù hợp cho mục đích sử dụng cá nhân, không cần đặt giá mô hình.", "适用于为多个用户提供服务的场景": "Phù hợp cho các kịch bản cung cấp dịch vụ cho nhiều người dùng.", "适用于展示系统功能的场景,提供基础功能演示": "Phù hợp cho các kịch bản hiển thị chức năng hệ thống, cung cấp bản demo chức năng cơ bản.", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Thích ứng với các hậu tố -thinking, -thinking-budget number và -nothinking", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "Thích ứng với các hậu tố -thinking, -thinking-budget number, -nothinking và -low/-medium/-high", "选择充值额度": "Chọn hạn ngạch nạp tiền", "选择同步来源": "Chọn nguồn đồng bộ", "选择同步渠道": "Chọn kênh đồng bộ", @@ -2737,4 +2737,4 @@ "可选,用于复现结果": "Tùy chọn, để tái tạo kết quả", "随机种子 (留空为随机)": "Hạt giống ngẫu nhiên (để trống cho ngẫu nhiên)" } -} \ No newline at end of file +} diff --git a/web/src/i18n/locales/zh.json b/web/src/i18n/locales/zh.json index 10e78b93a..a07885638 100644 --- a/web/src/i18n/locales/zh.json +++ b/web/src/i18n/locales/zh.json @@ -1984,7 +1984,7 @@ "适用于个人使用的场景,不需要设置模型价格": "适用于个人使用的场景,不需要设置模型价格", "适用于为多个用户提供服务的场景": "适用于为多个用户提供服务的场景", "适用于展示系统功能的场景,提供基础功能演示": "适用于展示系统功能的场景,提供基础功能演示", - "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀", + "适配 -thinking、-thinking-预算数字 和 -nothinking 后缀": "适配 -thinking、-thinking-预算数字、-nothinking 以及 -low/-medium/-high 后缀", "选择充值额度": "选择充值额度", "选择分组": "选择分组", "选择同步来源": "选择同步来源", @@ -2204,4 +2204,4 @@ "可选,用于复现结果": "可选,用于复现结果", "随机种子 (留空为随机)": "随机种子 (留空为随机)" } -} \ No newline at end of file +}