From c9f5de7048d6399500d4c3e886400bcda06f3d52 Mon Sep 17 00:00:00 2001 From: Papersnake Date: Fri, 6 Feb 2026 11:01:23 +0800 Subject: [PATCH] feat: support adaptive thinking --- relay/channel/aws/dto.go | 1 + relay/channel/claude/constants.go | 5 ++++- relay/channel/claude/relay-claude.go | 12 +++++++++++- relay/channel/vertex/dto.go | 4 ++++ relay/claude_handler.go | 12 +++++++++++- setting/ratio_setting/cache_ratio.go | 8 ++++++++ setting/ratio_setting/model_ratio.go | 4 ++++ setting/reasoning/suffix.go | 2 +- 8 files changed, 44 insertions(+), 4 deletions(-) diff --git a/relay/channel/aws/dto.go b/relay/channel/aws/dto.go index b060a593b..4a942714d 100644 --- a/relay/channel/aws/dto.go +++ b/relay/channel/aws/dto.go @@ -26,6 +26,7 @@ type AwsClaudeRequest struct { Tools any `json:"tools,omitempty"` ToolChoice any `json:"tool_choice,omitempty"` Thinking *dto.Thinking `json:"thinking,omitempty"` + OutputConfig json.RawMessage `json:"output_config,omitempty"` } func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) { diff --git a/relay/channel/claude/constants.go b/relay/channel/claude/constants.go index 797d1a90a..2da61d0c7 100644 --- a/relay/channel/claude/constants.go +++ b/relay/channel/claude/constants.go @@ -21,7 +21,10 @@ var ModelList = []string{ "claude-opus-4-5-20251101", "claude-opus-4-5-20251101-thinking", "claude-opus-4-6", - "claude-opus-4-6-thinking", + "claude-opus-4-6-max", + "claude-opus-4-6-high", + "claude-opus-4-6-medium", + "claude-opus-4-6-low", } var ChannelName = "claude" diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index 6d90daa08..bdb376edd 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -17,6 +17,7 @@ import ( "github.com/QuantumNous/new-api/relay/reasonmap" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/reasoning" "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" @@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model)) } - if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && + if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" && + strings.HasPrefix(textRequest.Model, "claude-opus-4-6") { + claudeRequest.Model = baseModel + claudeRequest.Thinking = &dto.Thinking{ + Type: "adaptive", + } + claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) + claudeRequest.TopP = 0 + claudeRequest.Temperature = common.GetPointer[float64](1.0) + } else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && strings.HasSuffix(textRequest.Model, "-thinking") { // 因为BudgetTokens 必须大于1024 diff --git a/relay/channel/vertex/dto.go b/relay/channel/vertex/dto.go index 68044ff32..2ddafa31b 100644 --- a/relay/channel/vertex/dto.go +++ b/relay/channel/vertex/dto.go @@ -1,6 +1,8 @@ package vertex import ( + "encoding/json" + "github.com/QuantumNous/new-api/dto" ) @@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct { Tools any `json:"tools,omitempty"` ToolChoice any `json:"tool_choice,omitempty"` Thinking *dto.Thinking `json:"thinking,omitempty"` + OutputConfig json.RawMessage `json:"output_config,omitempty"` } func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest { @@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest Tools: req.Tools, ToolChoice: req.ToolChoice, Thinking: req.Thinking, + OutputConfig: req.OutputConfig, } } diff --git a/relay/claude_handler.go b/relay/claude_handler.go index 7e05116da..fe63fcbd8 100644 --- a/relay/claude_handler.go +++ b/relay/claude_handler.go @@ -2,6 +2,7 @@ package relay import ( "bytes" + "encoding/json" "fmt" "io" "net/http" @@ -14,6 +15,7 @@ import ( "github.com/QuantumNous/new-api/relay/helper" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/reasoning" "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" @@ -49,7 +51,15 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model)) } - if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && + if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" && + strings.HasPrefix(request.Model, "claude-opus-4-6") { + request.Model = baseModel + request.Thinking = &dto.Thinking{ + Type: "adaptive", + } + request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) + info.UpstreamModelName = request.Model + } else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled && strings.HasSuffix(request.Model, "-thinking") { if request.Thinking == nil { // 因为BudgetTokens 必须大于1024 diff --git a/setting/ratio_setting/cache_ratio.go b/setting/ratio_setting/cache_ratio.go index ec85de11c..626267537 100644 --- a/setting/ratio_setting/cache_ratio.go +++ b/setting/ratio_setting/cache_ratio.go @@ -62,6 +62,10 @@ var defaultCacheRatio = map[string]float64{ "claude-opus-4-5-20251101-thinking": 0.1, "claude-opus-4-6": 0.1, "claude-opus-4-6-thinking": 0.1, + "claude-opus-4-6-max": 0.1, + "claude-opus-4-6-high": 0.1, + "claude-opus-4-6-medium": 0.1, + "claude-opus-4-6-low": 0.1, } var defaultCreateCacheRatio = map[string]float64{ @@ -86,6 +90,10 @@ var defaultCreateCacheRatio = map[string]float64{ "claude-opus-4-5-20251101-thinking": 1.25, "claude-opus-4-6": 1.25, "claude-opus-4-6-thinking": 1.25, + "claude-opus-4-6-max": 1.25, + "claude-opus-4-6-high": 1.25, + "claude-opus-4-6-medium": 1.25, + "claude-opus-4-6-low": 1.25, } //var defaultCreateCacheRatio = map[string]float64{} diff --git a/setting/ratio_setting/model_ratio.go b/setting/ratio_setting/model_ratio.go index e1a2b8ca0..6b7d70e77 100644 --- a/setting/ratio_setting/model_ratio.go +++ b/setting/ratio_setting/model_ratio.go @@ -143,6 +143,10 @@ var defaultModelRatio = map[string]float64{ "claude-sonnet-4-5-20250929": 1.5, "claude-opus-4-5-20251101": 2.5, "claude-opus-4-6": 2.5, + "claude-opus-4-6-max": 2.5, + "claude-opus-4-6-high": 2.5, + "claude-opus-4-6-medium": 2.5, + "claude-opus-4-6-low": 2.5, "claude-3-opus-20240229": 7.5, // $15 / 1M tokens "claude-opus-4-20250514": 7.5, "claude-opus-4-1-20250805": 7.5, diff --git a/setting/reasoning/suffix.go b/setting/reasoning/suffix.go index da3bdc7d3..fb66c6019 100644 --- a/setting/reasoning/suffix.go +++ b/setting/reasoning/suffix.go @@ -6,7 +6,7 @@ import ( "github.com/samber/lo" ) -var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"} +var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"} // TrimEffortSuffix -> modelName level(low) exists func TrimEffortSuffix(modelName string) (string, string, bool) {