feat: support adaptive thinking

This commit is contained in:
Papersnake
2026-02-06 11:01:23 +08:00
parent ff71786d8d
commit c9f5de7048
8 changed files with 44 additions and 4 deletions

View File

@@ -26,6 +26,7 @@ type AwsClaudeRequest struct {
Tools any `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
Thinking *dto.Thinking `json:"thinking,omitempty"`
OutputConfig json.RawMessage `json:"output_config,omitempty"`
}
func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {

View File

@@ -21,7 +21,10 @@ var ModelList = []string{
"claude-opus-4-5-20251101",
"claude-opus-4-5-20251101-thinking",
"claude-opus-4-6",
"claude-opus-4-6-thinking",
"claude-opus-4-6-max",
"claude-opus-4-6-high",
"claude-opus-4-6-medium",
"claude-opus-4-6-low",
}
var ChannelName = "claude"

View File

@@ -17,6 +17,7 @@ import (
"github.com/QuantumNous/new-api/relay/reasonmap"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/setting/reasoning"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
@@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
}
if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
claudeRequest.Model = baseModel
claudeRequest.Thinking = &dto.Thinking{
Type: "adaptive",
}
claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
claudeRequest.TopP = 0
claudeRequest.Temperature = common.GetPointer[float64](1.0)
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
strings.HasSuffix(textRequest.Model, "-thinking") {
// 因为BudgetTokens 必须大于1024

View File

@@ -1,6 +1,8 @@
package vertex
import (
"encoding/json"
"github.com/QuantumNous/new-api/dto"
)
@@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct {
Tools any `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
Thinking *dto.Thinking `json:"thinking,omitempty"`
OutputConfig json.RawMessage `json:"output_config,omitempty"`
}
func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
@@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest
Tools: req.Tools,
ToolChoice: req.ToolChoice,
Thinking: req.Thinking,
OutputConfig: req.OutputConfig,
}
}

View File

@@ -2,6 +2,7 @@ package relay
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
@@ -14,6 +15,7 @@ import (
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/setting/reasoning"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
@@ -49,7 +51,15 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
}
if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
strings.HasPrefix(request.Model, "claude-opus-4-6") {
request.Model = baseModel
request.Thinking = &dto.Thinking{
Type: "adaptive",
}
request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
info.UpstreamModelName = request.Model
} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
strings.HasSuffix(request.Model, "-thinking") {
if request.Thinking == nil {
// 因为BudgetTokens 必须大于1024

View File

@@ -62,6 +62,10 @@ var defaultCacheRatio = map[string]float64{
"claude-opus-4-5-20251101-thinking": 0.1,
"claude-opus-4-6": 0.1,
"claude-opus-4-6-thinking": 0.1,
"claude-opus-4-6-max": 0.1,
"claude-opus-4-6-high": 0.1,
"claude-opus-4-6-medium": 0.1,
"claude-opus-4-6-low": 0.1,
}
var defaultCreateCacheRatio = map[string]float64{
@@ -86,6 +90,10 @@ var defaultCreateCacheRatio = map[string]float64{
"claude-opus-4-5-20251101-thinking": 1.25,
"claude-opus-4-6": 1.25,
"claude-opus-4-6-thinking": 1.25,
"claude-opus-4-6-max": 1.25,
"claude-opus-4-6-high": 1.25,
"claude-opus-4-6-medium": 1.25,
"claude-opus-4-6-low": 1.25,
}
//var defaultCreateCacheRatio = map[string]float64{}

View File

@@ -143,6 +143,10 @@ var defaultModelRatio = map[string]float64{
"claude-sonnet-4-5-20250929": 1.5,
"claude-opus-4-5-20251101": 2.5,
"claude-opus-4-6": 2.5,
"claude-opus-4-6-max": 2.5,
"claude-opus-4-6-high": 2.5,
"claude-opus-4-6-medium": 2.5,
"claude-opus-4-6-low": 2.5,
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
"claude-opus-4-20250514": 7.5,
"claude-opus-4-1-20250805": 7.5,

View File

@@ -6,7 +6,7 @@ import (
"github.com/samber/lo"
)
var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
// TrimEffortSuffix -> modelName level(low) exists
func TrimEffortSuffix(modelName string) (string, string, bool) {