From e082268533c53685c1b39a225270faa08fef4b31 Mon Sep 17 00:00:00 2001
From: Seefs <40468931+seefs001@users.noreply.github.com>
Date: Fri, 7 Nov 2025 17:43:33 +0800
Subject: [PATCH] feat: ShouldPreserveThinkingSuffix (#2189)

---
 relay/channel/claude/relay-claude.go          |  4 +-
 relay/channel/gemini/adaptor.go               |  3 +-
 relay/channel/openai/adaptor.go               |  4 +-
 relay/channel/vertex/adaptor.go               |  3 +-
 relay/channel/volcengine/adaptor.go           |  5 +-
 relay/claude_handler.go                       |  4 +-
 setting/model_setting/global.go               | 24 ++++-
 web/src/components/settings/ModelSetting.jsx  |  4 +-
 web/src/i18n/locales/en.json                  |  3 +
 web/src/i18n/locales/fr.json                  |  3 +
 web/src/i18n/locales/ja.json                  |  3 +
 web/src/i18n/locales/ru.json                  |  3 +
 web/src/i18n/locales/zh.json                  |  3 +
 .../Setting/Model/SettingGlobalModel.jsx      | 91 ++++++++++++++++---
 14 files changed, 138 insertions(+), 19 deletions(-)
diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go
index 9c72270f6..69c22db8b 100644
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -189,7 +189,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
 		claudeRequest.TopP = 0
 		claudeRequest.Temperature = common.GetPointer[float64](1.0)
-		claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) {
+			claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		}
 	}
 
 	if textRequest.ReasoningEffort != "" {
diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go
index b2a8b186f..b1067bc20 100644
--- a/relay/channel/gemini/adaptor.go
+++ b/relay/channel/gemini/adaptor.go
@@ -127,7 +127,8 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 
 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 
-	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled &&
+		!model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
 		// 新增逻辑：处理 -thinking-<budget> 格式
 		if strings.Contains(info.UpstreamModelName, "-thinking-") {
 			parts := strings.Split(info.UpstreamModelName, "-thinking-")
diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go
index a83082807..c2eae2488 100644
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -27,6 +27,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/common_handler"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
 	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/types"
 
 	"github.com/gin-gonic/gin"
@@ -224,7 +225,8 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 			request.Usage = json.RawMessage(`{"include":true}`)
 		}
 		// 适配 OpenRouter 的 thinking 后缀
-		if strings.HasSuffix(info.UpstreamModelName, "-thinking") {
+		if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) &&
+			strings.HasSuffix(info.UpstreamModelName, "-thinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 			request.Model = info.UpstreamModelName
 			if len(request.Reasoning) == 0 {
diff --git a/relay/channel/vertex/adaptor.go b/relay/channel/vertex/adaptor.go
index 94ca47657..3637931da 100644
--- a/relay/channel/vertex/adaptor.go
+++ b/relay/channel/vertex/adaptor.go
@@ -168,7 +168,8 @@ func (a *Adaptor) getRequestUrl(info *relaycommon.RelayInfo, modelName, suffix s
 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	suffix := ""
 	if a.RequestMode == RequestModeGemini {
-		if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		if model_setting.GetGeminiSettings().ThinkingAdapterEnabled &&
+			!model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
 			// 新增逻辑：处理 -thinking-<budget> 格式
 			if strings.Contains(info.UpstreamModelName, "-thinking-") {
 				parts := strings.Split(info.UpstreamModelName, "-thinking-")
diff --git a/relay/channel/volcengine/adaptor.go b/relay/channel/volcengine/adaptor.go
index a377b1dde..6e2590591 100644
--- a/relay/channel/volcengine/adaptor.go
+++ b/relay/channel/volcengine/adaptor.go
@@ -16,6 +16,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/channel/openai"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/constant"
+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/types"
 
 	"github.com/gin-gonic/gin"
@@ -291,7 +292,9 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		return nil, errors.New("request is nil")
 	}
 
-	if strings.HasSuffix(info.UpstreamModelName, "-thinking") && strings.HasPrefix(info.UpstreamModelName, "deepseek") {
+	if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) &&
+		strings.HasSuffix(info.UpstreamModelName, "-thinking") &&
+		strings.HasPrefix(info.UpstreamModelName, "deepseek") {
 		info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 		request.Model = info.UpstreamModelName
 		request.THINKING = json.RawMessage(`{"type": "enabled"}`)
diff --git a/relay/claude_handler.go b/relay/claude_handler.go
index c71a5ca2b..395d1e37b 100644
--- a/relay/claude_handler.go
+++ b/relay/claude_handler.go
@@ -67,7 +67,9 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 			request.TopP = 0
 			request.Temperature = common.GetPointer[float64](1.0)
 		}
-		request.Model = strings.TrimSuffix(request.Model, "-thinking")
+		if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
+			request.Model = strings.TrimSuffix(request.Model, "-thinking")
+		}
 		info.UpstreamModelName = request.Model
 	}
 
diff --git a/setting/model_setting/global.go b/setting/model_setting/global.go
index afde02144..e8815c0e9 100644
--- a/setting/model_setting/global.go
+++ b/setting/model_setting/global.go
@@ -1,16 +1,23 @@
 package model_setting
 
 import (
+	"strings"
+
 	"github.com/QuantumNous/new-api/setting/config"
 )
 
 type GlobalSettings struct {
-	PassThroughRequestEnabled bool `json:"pass_through_request_enabled"`
+	PassThroughRequestEnabled bool     `json:"pass_through_request_enabled"`
+	ThinkingModelBlacklist    []string `json:"thinking_model_blacklist"`
 }
 
 // 默认配置
 var defaultOpenaiSettings = GlobalSettings{
 	PassThroughRequestEnabled: false,
+	ThinkingModelBlacklist: []string{
+		"moonshotai/kimi-k2-thinking",
+		"kimi-k2-thinking",
+	},
 }
 
 // 全局实例
@@ -24,3 +31,18 @@ func init() {
 func GetGlobalSettings() *GlobalSettings {
 	return &globalSettings
 }
+
+// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking 后缀
+func ShouldPreserveThinkingSuffix(modelName string) bool {
+	target := strings.TrimSpace(modelName)
+	if target == "" {
+		return false
+	}
+
+	for _, entry := range globalSettings.ThinkingModelBlacklist {
+		if strings.TrimSpace(entry) == target {
+			return true
+		}
+	}
+	return false
+}
diff --git a/web/src/components/settings/ModelSetting.jsx b/web/src/components/settings/ModelSetting.jsx
index c87164846..768e10709 100644
--- a/web/src/components/settings/ModelSetting.jsx
+++ b/web/src/components/settings/ModelSetting.jsx
@@ -37,6 +37,7 @@ const ModelSetting = () => {
     'claude.default_max_tokens': '',
     'claude.thinking_adapter_budget_tokens_percentage': 0.8,
     'global.pass_through_request_enabled': false,
+    'global.thinking_model_blacklist': '[]',
     'general_setting.ping_interval_enabled': false,
     'general_setting.ping_interval_seconds': 60,
     'gemini.thinking_adapter_enabled': false,
@@ -56,7 +57,8 @@ const ModelSetting = () => {
           item.key === 'gemini.version_settings' ||
           item.key === 'claude.model_headers_settings' ||
           item.key === 'claude.default_max_tokens' ||
-          item.key === 'gemini.supported_imagine_models'
+          item.key === 'gemini.supported_imagine_models' ||
+          item.key === 'global.thinking_model_blacklist'
         ) {
           if (item.value !== '') {
             item.value = JSON.stringify(JSON.parse(item.value), null, 2);
diff --git a/web/src/i18n/locales/en.json b/web/src/i18n/locales/en.json
index ef233b6e0..4cbe216d1 100644
--- a/web/src/i18n/locales/en.json
+++ b/web/src/i18n/locales/en.json
@@ -561,6 +561,9 @@
     "启用绘图功能": "Enable drawing function",
     "启用请求体透传功能": "Enable request body pass-through functionality",
     "启用请求透传": "Enable request pass-through",
+    "禁用思考处理的模型列表": "Models skipping thinking handling",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Models in this list will not automatically add or remove the -thinking/-nothinking suffix.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Enter a JSON array, e.g. [\"model-a\",\"model-b\"]",
     "启用额度消费日志记录": "Enable quota consumption logging",
     "启用验证": "Enable Authentication",
     "周": "week",
diff --git a/web/src/i18n/locales/fr.json b/web/src/i18n/locales/fr.json
index a810ce07d..ef593e0bd 100644
--- a/web/src/i18n/locales/fr.json
+++ b/web/src/i18n/locales/fr.json
@@ -564,6 +564,9 @@
     "启用绘图功能": "Activer la fonction de dessin",
     "启用请求体透传功能": "Activer la fonctionnalité de transmission du corps de la requête",
     "启用请求透传": "Activer la transmission de la requête",
+    "禁用思考处理的模型列表": "Liste noire des modèles pour le traitement thinking",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Les modèles listés ici n'ajouteront ni ne retireront automatiquement le suffixe -thinking/-nothinking.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Saisissez un tableau JSON, par ex. [\"model-a\",\"model-b\"]",
     "启用额度消费日志记录": "Activer la journalisation de la consommation de quota",
     "启用验证": "Activer l'authentification",
     "周": "semaine",
diff --git a/web/src/i18n/locales/ja.json b/web/src/i18n/locales/ja.json
index 46456b157..67d3c79b6 100644
--- a/web/src/i18n/locales/ja.json
+++ b/web/src/i18n/locales/ja.json
@@ -561,6 +561,9 @@
     "启用绘图功能": "画像生成機能を有効にする",
     "启用请求体透传功能": "リクエストボディのパススルー機能を有効にします。",
     "启用请求透传": "リクエストパススルーを有効にする",
+    "禁用思考处理的模型列表": "Thinking処理を無効化するモデル一覧",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "ここに含まれるモデルでは-thinking/-nothinkingサフィックスを自動的に追加・削除しません。",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "JSON配列を入力してください（例：[\"model-a\",\"model-b\"]）",
     "启用额度消费日志记录": "クォータ消費のログ記録を有効にする",
     "启用验证": "認証を有効にする",
     "周": "週",
diff --git a/web/src/i18n/locales/ru.json b/web/src/i18n/locales/ru.json
index 35abe12e0..5adc1deb7 100644
--- a/web/src/i18n/locales/ru.json
+++ b/web/src/i18n/locales/ru.json
@@ -567,6 +567,9 @@
     "启用绘图功能": "Включить функцию рисования",
     "启用请求体透传功能": "Включить функцию прозрачной передачи тела запроса",
     "启用请求透传": "Включить прозрачную передачу запросов",
+    "禁用思考处理的模型列表": "Список моделей без обработки thinking",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Для этих моделей суффиксы -thinking/-nothinking не будут добавляться или удаляться автоматически.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Введите JSON-массив, например [\"model-a\",\"model-b\"]",
     "启用额度消费日志记录": "Включить журналирование потребления квоты",
     "启用验证": "Включить проверку",
     "周": "Неделя",
diff --git a/web/src/i18n/locales/zh.json b/web/src/i18n/locales/zh.json
index 973f3c937..e1bde4b99 100644
--- a/web/src/i18n/locales/zh.json
+++ b/web/src/i18n/locales/zh.json
@@ -558,6 +558,9 @@
     "启用绘图功能": "启用绘图功能",
     "启用请求体透传功能": "启用请求体透传功能",
     "启用请求透传": "启用请求透传",
+    "禁用思考处理的模型列表": "禁用思考处理的模型列表",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "请输入JSON数组，如 [\"model-a\",\"model-b\"]",
     "启用额度消费日志记录": "启用额度消费日志记录",
     "启用验证": "启用验证",
     "周": "周",
diff --git a/web/src/pages/Setting/Model/SettingGlobalModel.jsx b/web/src/pages/Setting/Model/SettingGlobalModel.jsx
index a23125f90..2ed9d1bc4 100644
--- a/web/src/pages/Setting/Model/SettingGlobalModel.jsx
+++ b/web/src/pages/Setting/Model/SettingGlobalModel.jsx
@@ -29,23 +29,44 @@ import {
 } from '../../../helpers';
 import { useTranslation } from 'react-i18next';
 
+const thinkingExample = JSON.stringify(
+  ['moonshotai/kimi-k2-thinking', 'kimi-k2-thinking'],
+  null,
+  2,
+);
+
+const defaultGlobalSettingInputs = {
+  'global.pass_through_request_enabled': false,
+  'global.thinking_model_blacklist': '[]',
+  'general_setting.ping_interval_enabled': false,
+  'general_setting.ping_interval_seconds': 60,
+};
+
 export default function SettingGlobalModel(props) {
   const { t } = useTranslation();
 
   const [loading, setLoading] = useState(false);
-  const [inputs, setInputs] = useState({
-    'global.pass_through_request_enabled': false,
-    'general_setting.ping_interval_enabled': false,
-    'general_setting.ping_interval_seconds': 60,
-  });
+  const [inputs, setInputs] = useState(defaultGlobalSettingInputs);
   const refForm = useRef();
-  const [inputsRow, setInputsRow] = useState(inputs);
+  const [inputsRow, setInputsRow] = useState(defaultGlobalSettingInputs);
+
+  const normalizeValueBeforeSave = (key, value) => {
+    if (key === 'global.thinking_model_blacklist') {
+      const text = typeof value === 'string' ? value.trim() : '';
+      return text === '' ? '[]' : value;
+    }
+    return value;
+  };
 
   function onSubmit() {
     const updateArray = compareObjects(inputs, inputsRow);
     if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
     const requestQueue = updateArray.map((item) => {
-      let value = String(inputs[item.key]);
+      const normalizedValue = normalizeValueBeforeSave(
+        item.key,
+        inputs[item.key],
+      );
+      let value = String(normalizedValue);
 
       return API.put('/api/option/', {
         key: item.key,
@@ -74,14 +95,30 @@ export default function SettingGlobalModel(props) {
 
   useEffect(() => {
     const currentInputs = {};
-    for (let key in props.options) {
-      if (Object.keys(inputs).includes(key)) {
-        currentInputs[key] = props.options[key];
+    for (const key of Object.keys(defaultGlobalSettingInputs)) {
+      if (props.options[key] !== undefined) {
+        let value = props.options[key];
+        if (key === 'global.thinking_model_blacklist') {
+          try {
+            value =
+              value && String(value).trim() !== ''
+                ? JSON.stringify(JSON.parse(value), null, 2)
+                : defaultGlobalSettingInputs[key];
+          } catch (error) {
+            value = defaultGlobalSettingInputs[key];
+          }
+        }
+        currentInputs[key] = value;
+      } else {
+        currentInputs[key] = defaultGlobalSettingInputs[key];
       }
     }
+
     setInputs(currentInputs);
     setInputsRow(structuredClone(currentInputs));
-    refForm.current.setValues(currentInputs);
+    if (refForm.current) {
+      refForm.current.setValues(currentInputs);
+    }
   }, [props.options]);
 
   return (
@@ -110,6 +147,38 @@ export default function SettingGlobalModel(props) {
                 />
               </Col>
             </Row>
+            <Row>
+              <Col span={24}>
+                <Form.TextArea
+                  label={t('禁用思考处理的模型列表')}
+                  field={'global.thinking_model_blacklist'}
+                  placeholder={
+                    t('例如：') +
+                    '\n' +
+                    thinkingExample
+                  }
+                  rows={4}
+                  rules={[
+                    {
+                      validator: (rule, value) => {
+                        if (!value || value.trim() === '') return true;
+                        return verifyJSON(value);
+                      },
+                      message: t('不是合法的 JSON 字符串'),
+                    },
+                  ]}
+                  extraText={t(
+                    '列出的模型将不会自动添加或移除-thinking/-nothinking 后缀',
+                  )}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      'global.thinking_model_blacklist': value,
+                    })
+                  }
+                />
+              </Col>
+            </Row>
 
             <Form.Section text={t('连接保活设置')}>
               <Row style={{ marginTop: 10 }}>