feat: ShouldPreserveThinkingSuffix (#2189 )

Merge pull request #2188 from QuantumNous/fix-multikey-autodisable
fix(channel): 当没有可用密钥时返回错误而不是第一个密钥
2026-04-02 06:24:07 +00:00 · 2025-11-07 17:43:33 +08:00 · 2025-11-07 17:41:39 +08:00 · 2025-11-07 17:40:02 +08:00 · 2025-11-07 16:27:54 +08:00 · 2025-11-06 23:24:37 +08:00
46 changed files with 1056 additions and 140 deletions
--- a/README.md
+++ b/README.md
@@ -141,6 +141,7 @@ New API提供了丰富的功能，详细特性请参考[特性说明](https://do
 - `NOTIFICATION_LIMIT_DURATION_MINUTE`：邮件等通知限制持续时间，默认 `10`分钟
 - `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认 `2`
 - `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志，默认`false`
+- `TASK_PRICE_PATCH=sora-2-all,sora-2-pro-all`: 异步任务设置某些模型按次计费，多个模型用逗号分隔，例如`sora-2-all,sora-2-pro-all`，表示sora-2-all和sora-2-pro-all模型异步任务仅按次计费，不按秒等计费。

 ## 部署

--- a/common/constants.go
+++ b/common/constants.go
@@ -159,14 +159,15 @@ var (
 	GlobalWebRateLimitNum      int
 	GlobalWebRateLimitDuration int64

+	CriticalRateLimitEnable   bool
+	CriticalRateLimitNum            = 20
+	CriticalRateLimitDuration int64 = 20 * 60
+
 	UploadRateLimitNum            = 10
 	UploadRateLimitDuration int64 = 60

 	DownloadRateLimitNum            = 10
 	DownloadRateLimitDuration int64 = 60
-
-	CriticalRateLimitNum            = 20
-	CriticalRateLimitDuration int64 = 20 * 60
 )

 var RateLimitKeyExpirationDuration = 20 * time.Minute
--- a/common/init.go
+++ b/common/init.go
@@ -99,6 +99,9 @@ func InitEnv() {
 	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
 	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))

+	CriticalRateLimitEnable = GetEnvOrDefaultBool("CRITICAL_RATE_LIMIT_ENABLE", true)
+	CriticalRateLimitNum = GetEnvOrDefault("CRITICAL_RATE_LIMIT", 20)
+	CriticalRateLimitDuration = int64(GetEnvOrDefault("CRITICAL_RATE_LIMIT_DURATION", 20*60))
 	initConstantEnv()
 }

--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -617,6 +617,10 @@ func TestAllChannels(c *gin.Context) {
 var autoTestChannelsOnce sync.Once

 func AutomaticallyTestChannels() {
+	// 只在Master节点定时测试渠道
+	if !common.IsMasterNode {
+		return
+	}
 	autoTestChannelsOnce.Do(func() {
 		for {
 			if !operation_setting.GetMonitorSetting().AutoTestChannelEnabled {
--- a/controller/channel.go
+++ b/controller/channel.go
@@ -649,13 +649,15 @@ func DeleteDisabledChannel(c *gin.Context) {
 }

 type ChannelTag struct {
-	Tag          string  `json:"tag"`
-	NewTag       *string `json:"new_tag"`
-	Priority     *int64  `json:"priority"`
-	Weight       *uint   `json:"weight"`
-	ModelMapping *string `json:"model_mapping"`
-	Models       *string `json:"models"`
-	Groups       *string `json:"groups"`
+	Tag            string  `json:"tag"`
+	NewTag         *string `json:"new_tag"`
+	Priority       *int64  `json:"priority"`
+	Weight         *uint   `json:"weight"`
+	ModelMapping   *string `json:"model_mapping"`
+	Models         *string `json:"models"`
+	Groups         *string `json:"groups"`
+	ParamOverride  *string `json:"param_override"`
+	HeaderOverride *string `json:"header_override"`
 }

 func DisableTagChannels(c *gin.Context) {
@@ -721,7 +723,29 @@ func EditTagChannels(c *gin.Context) {
 		})
 		return
 	}
-	err = model.EditChannelByTag(channelTag.Tag, channelTag.NewTag, channelTag.ModelMapping, channelTag.Models, channelTag.Groups, channelTag.Priority, channelTag.Weight)
+	if channelTag.ParamOverride != nil {
+		trimmed := strings.TrimSpace(*channelTag.ParamOverride)
+		if trimmed != "" && !json.Valid([]byte(trimmed)) {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "参数覆盖必须是合法的 JSON 格式",
+			})
+			return
+		}
+		channelTag.ParamOverride = common.GetPointer[string](trimmed)
+	}
+	if channelTag.HeaderOverride != nil {
+		trimmed := strings.TrimSpace(*channelTag.HeaderOverride)
+		if trimmed != "" && !json.Valid([]byte(trimmed)) {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "请求头覆盖必须是合法的 JSON 格式",
+			})
+			return
+		}
+		channelTag.HeaderOverride = common.GetPointer[string](trimmed)
+	}
+	err = model.EditChannelByTag(channelTag.Tag, channelTag.NewTag, channelTag.ModelMapping, channelTag.Models, channelTag.Groups, channelTag.Priority, channelTag.Weight, channelTag.ParamOverride, channelTag.HeaderOverride)
 	if err != nil {
 		common.ApiError(c, err)
 		return
--- a/dto/claude.go
+++ b/dto/claude.go
@@ -510,11 +510,44 @@ func (c *ClaudeResponse) GetClaudeError() *types.ClaudeError {
 }

 type ClaudeUsage struct {
-	InputTokens              int                  `json:"input_tokens"`
-	CacheCreationInputTokens int                  `json:"cache_creation_input_tokens"`
-	CacheReadInputTokens     int                  `json:"cache_read_input_tokens"`
-	OutputTokens             int                  `json:"output_tokens"`
-	ServerToolUse            *ClaudeServerToolUse `json:"server_tool_use,omitempty"`
+	InputTokens              int                       `json:"input_tokens"`
+	CacheCreationInputTokens int                       `json:"cache_creation_input_tokens"`
+	CacheReadInputTokens     int                       `json:"cache_read_input_tokens"`
+	OutputTokens             int                       `json:"output_tokens"`
+	CacheCreation            *ClaudeCacheCreationUsage `json:"cache_creation,omitempty"`
+	// claude cache 1h
+	ClaudeCacheCreation5mTokens int                  `json:"claude_cache_creation_5_m_tokens"`
+	ClaudeCacheCreation1hTokens int                  `json:"claude_cache_creation_1_h_tokens"`
+	ServerToolUse               *ClaudeServerToolUse `json:"server_tool_use,omitempty"`
+}
+
+type ClaudeCacheCreationUsage struct {
+	Ephemeral5mInputTokens int `json:"ephemeral_5m_input_tokens,omitempty"`
+	Ephemeral1hInputTokens int `json:"ephemeral_1h_input_tokens,omitempty"`
+}
+
+func (u *ClaudeUsage) GetCacheCreation5mTokens() int {
+	if u == nil || u.CacheCreation == nil {
+		return 0
+	}
+	return u.CacheCreation.Ephemeral5mInputTokens
+}
+
+func (u *ClaudeUsage) GetCacheCreation1hTokens() int {
+	if u == nil || u.CacheCreation == nil {
+		return 0
+	}
+	return u.CacheCreation.Ephemeral1hInputTokens
+}
+
+func (u *ClaudeUsage) GetCacheCreationTotalTokens() int {
+	if u == nil {
+		return 0
+	}
+	if u.CacheCreationInputTokens > 0 {
+		return u.CacheCreationInputTokens
+	}
+	return u.GetCacheCreation5mTokens() + u.GetCacheCreation1hTokens()
 }

 type ClaudeServerToolUse struct {
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -232,10 +232,13 @@ func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
 	return "system"
 }

+const CustomType = "custom"
+
 type ToolCallRequest struct {
 	ID       string          `json:"id,omitempty"`
 	Type     string          `json:"type"`
-	Function FunctionRequest `json:"function"`
+	Function FunctionRequest `json:"function,omitempty"`
+	Custom   json.RawMessage `json:"custom,omitempty"`
 }

 type FunctionRequest struct {
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -230,6 +230,11 @@ type Usage struct {
 	InputTokens            int                `json:"input_tokens"`
 	OutputTokens           int                `json:"output_tokens"`
 	InputTokensDetails     *InputTokenDetails `json:"input_tokens_details"`
+
+	// claude cache 1h
+	ClaudeCacheCreation5mTokens int `json:"claude_cache_creation_5_m_tokens"`
+	ClaudeCacheCreation1hTokens int `json:"claude_cache_creation_1_h_tokens"`
+
 	// OpenRouter Params
 	Cost any `json:"cost,omitempty"`
 }
--- a/logger/logger.go
+++ b/logger/logger.go
@@ -67,8 +67,10 @@ func LogError(ctx context.Context, msg string) {
 }

 func LogDebug(ctx context.Context, msg string, args ...any) {
-	msg = fmt.Sprintf(msg, args...)
 	if common.DebugEnabled {
+		if len(args) > 0 {
+			msg = fmt.Sprintf(msg, args...)
+		}
 		logHelper(ctx, loggerDebug, msg)
 	}
 }
--- a/middleware/rate-limit.go
+++ b/middleware/rate-limit.go
@@ -102,7 +102,10 @@ func GlobalAPIRateLimit() func(c *gin.Context) {
 }

 func CriticalRateLimit() func(c *gin.Context) {
-	return rateLimitFactory(common.CriticalRateLimitNum, common.CriticalRateLimitDuration, "CT")
+	if common.CriticalRateLimitEnable {
+		return rateLimitFactory(common.CriticalRateLimitNum, common.CriticalRateLimitDuration, "CT")
+	}
+	return defNext
 }

 func DownloadRateLimit() func(c *gin.Context) {
--- a/model/channel.go
+++ b/model/channel.go
@@ -138,9 +138,11 @@ func (channel *Channel) GetNextEnabledKey() (string, int, *types.NewAPIError) {
 			enabledIdx = append(enabledIdx, i)
 		}
 	}
-	// If no specific status list or none enabled, fall back to first key
+	// If no specific status list or none enabled, return an explicit error so caller can
+	// properly handle a channel with no available keys (e.g. mark channel disabled).
+	// Returning the first key here caused requests to keep using an already-disabled key.
 	if len(enabledIdx) == 0 {
-		return keys[0], 0, nil
+		return "", 0, types.NewError(errors.New("no enabled keys"), types.ErrorCodeChannelNoAvailableKey)
 	}

 	switch channel.ChannelInfo.MultiKeyMode {
@@ -688,7 +690,7 @@ func DisableChannelByTag(tag string) error {
 	return err
 }

-func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *string, group *string, priority *int64, weight *uint) error {
+func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *string, group *string, priority *int64, weight *uint, paramOverride *string, headerOverride *string) error {
 	updateData := Channel{}
 	shouldReCreateAbilities := false
 	updatedTag := tag
@@ -714,6 +716,12 @@ func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *
 	if weight != nil {
 		updateData.Weight = weight
 	}
+	if paramOverride != nil {
+		updateData.ParamOverride = paramOverride
+	}
+	if headerOverride != nil {
+		updateData.HeaderOverride = headerOverride
+	}

 	err := DB.Model(&Channel{}).Where("tag = ?", tag).Updates(updateData).Error
 	if err != nil {
--- a/relay/channel/ali/image.go
+++ b/relay/channel/ali/image.go
@@ -98,9 +98,9 @@ func oaiFormEdit2AliImageEdit(c *gin.Context, info *relaycommon.RelayInfo, reque
 		return nil, errors.New("image is required")
 	}

-	if len(imageFiles) > 1 {
-		return nil, errors.New("only one image is supported for qwen edit")
-	}
+	//if len(imageFiles) > 1 {
+	//	return nil, errors.New("only one image is supported for qwen edit")
+	//}

 	// 获取base64编码的图片
 	var imageBase64s []string
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -189,7 +189,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
 		claudeRequest.TopP = 0
 		claudeRequest.Temperature = common.GetPointer[float64](1.0)
-		claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) {
+			claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		}
 	}

 	if textRequest.ReasoningEffort != "" {
@@ -596,6 +598,8 @@ func FormatClaudeResponseInfo(requestMode int, claudeResponse *dto.ClaudeRespons
 			claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
 			claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
 			claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
+			claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
+			claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Message.Usage.GetCacheCreation1hTokens()
 			claudeInfo.Usage.CompletionTokens = claudeResponse.Message.Usage.OutputTokens
 		} else if claudeResponse.Type == "content_block_delta" {
 			if claudeResponse.Delta.Text != nil {
@@ -740,6 +744,8 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 		claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
 		claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
 		claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
+		claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
+		claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Usage.GetCacheCreation1hTokens()
 	}
 	var responseData []byte
 	switch info.RelayFormat {
--- a/relay/channel/gemini/adaptor.go
+++ b/relay/channel/gemini/adaptor.go
@@ -127,7 +127,8 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {

-	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled &&
+		!model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
 		// 新增逻辑：处理 -thinking-<budget> 格式
 		if strings.Contains(info.UpstreamModelName, "-thinking-") {
 			parts := strings.Split(info.UpstreamModelName, "-thinking-")
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -27,6 +27,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/common_handler"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
 	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -224,7 +225,8 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 			request.Usage = json.RawMessage(`{"include":true}`)
 		}
 		// 适配 OpenRouter 的 thinking 后缀
-		if strings.HasSuffix(info.UpstreamModelName, "-thinking") {
+		if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) &&
+			strings.HasSuffix(info.UpstreamModelName, "-thinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 			request.Model = info.UpstreamModelName
 			if len(request.Reasoning) == 0 {
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -122,6 +122,10 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 	var usage = &dto.Usage{}
 	var streamItems []string // store stream items
 	var lastStreamData string
+	var secondLastStreamData string // 存储倒数第二个stream data，用于音频模型
+
+	// 检查是否为音频模型
+	isAudioModel := strings.Contains(strings.ToLower(model), "audio")

 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		if lastStreamData != "" {
@@ -131,12 +135,35 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 			}
 		}
 		if len(data) > 0 {
+			// 对音频模型，保存倒数第二个stream data
+			if isAudioModel && lastStreamData != "" {
+				secondLastStreamData = lastStreamData
+			}
+
 			lastStreamData = data
 			streamItems = append(streamItems, data)
 		}
 		return true
 	})

+	// 对音频模型，从倒数第二个stream data中提取usage信息
+	if isAudioModel && secondLastStreamData != "" {
+		var streamResp struct {
+			Usage *dto.Usage `json:"usage"`
+		}
+		err := json.Unmarshal([]byte(secondLastStreamData), &streamResp)
+		if err == nil && streamResp.Usage != nil && service.ValidUsage(streamResp.Usage) {
+			usage = streamResp.Usage
+			containStreamUsage = true
+
+			if common.DebugEnabled {
+				logger.LogDebug(c, fmt.Sprintf("Audio model usage extracted from second last SSE: PromptTokens=%d, CompletionTokens=%d, TotalTokens=%d, InputTokens=%d, OutputTokens=%d",
+					usage.PromptTokens, usage.CompletionTokens, usage.TotalTokens,
+					usage.InputTokens, usage.OutputTokens))
+			}
+		}
+	}
+
 	// 处理最后的响应
 	shouldSendLastResp := true
 	if err := handleLastResponse(lastStreamData, &responseId, &createAt, &systemFingerprint, &model, &usage,
--- a/relay/channel/task/ali/adaptor.go
+++ b/relay/channel/task/ali/adaptor.go
@@ -5,14 +5,17 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strconv"
 	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/relay/channel"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/service"
+	"github.com/samber/lo"

 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"
@@ -108,6 +111,7 @@ type TaskAdaptor struct {
 	ChannelType int
 	apiKey      string
 	baseURL     string
+	aliReq      *AliVideoRequest
 }

 func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {
@@ -118,6 +122,16 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {

 func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) {
 	// 阿里通义万相支持 JSON 格式，不使用 multipart
+	var taskReq relaycommon.TaskSubmitReq
+	if err := common.UnmarshalBodyReusable(c, &taskReq); err != nil {
+		return service.TaskErrorWrapper(err, "unmarshal_task_request_failed", http.StatusBadRequest)
+	}
+	aliReq, err := a.convertToAliRequest(info, taskReq)
+	if err != nil {
+		return service.TaskErrorWrapper(err, "convert_to_ali_request_failed", http.StatusInternalServerError)
+	}
+	a.aliReq = aliReq
+	logger.LogJson(c, "ali video request body", aliReq)
 	return relaycommon.ValidateMultipartDirect(c, info)
 }

@@ -134,13 +148,7 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
 }

 func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
-	var taskReq relaycommon.TaskSubmitReq
-	if err := common.UnmarshalBodyReusable(c, &taskReq); err != nil {
-		return nil, errors.Wrap(err, "unmarshal_task_request_failed")
-	}
-	aliReq := a.convertToAliRequest(taskReq)
-
-	bodyBytes, err := common.Marshal(aliReq)
+	bodyBytes, err := common.Marshal(a.aliReq)
 	if err != nil {
 		return nil, errors.Wrap(err, "marshal_ali_request_failed")
 	}
@@ -148,7 +156,98 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
 	return bytes.NewReader(bodyBytes), nil
 }

-func (a *TaskAdaptor) convertToAliRequest(req relaycommon.TaskSubmitReq) *AliVideoRequest {
+var (
+	size480p = []string{
+		"832*480",
+		"480*832",
+		"624*624",
+	}
+	size720p = []string{
+		"1280*720",
+		"720*1280",
+		"960*960",
+		"1088*832",
+		"832*1088",
+	}
+	size1080p = []string{
+		"1920*1080",
+		"1080*1920",
+		"1440*1440",
+		"1632*1248",
+		"1248*1632",
+	}
+)
+
+func sizeToResolution(size string) (string, error) {
+	if lo.Contains(size480p, size) {
+		return "480P", nil
+	} else if lo.Contains(size720p, size) {
+		return "720P", nil
+	} else if lo.Contains(size1080p, size) {
+		return "1080P", nil
+	}
+	return "", fmt.Errorf("invalid size: %s", size)
+}
+
+func ProcessAliOtherRatios(aliReq *AliVideoRequest) (map[string]float64, error) {
+	otherRatios := make(map[string]float64)
+	aliRatios := map[string]map[string]float64{
+		"wan2.5-t2v-preview": {
+			"480P":  1,
+			"720P":  2,
+			"1080P": 1 / 0.3,
+		},
+		"wan2.2-t2v-plus": {
+			"480P":  1,
+			"1080P": 0.7 / 0.14,
+		},
+		"wan2.5-i2v-preview": {
+			"480P":  1,
+			"720P":  2,
+			"1080P": 1 / 0.3,
+		},
+		"wan2.2-i2v-plus": {
+			"480P":  1,
+			"1080P": 0.7 / 0.14,
+		},
+		"wan2.2-kf2v-flash": {
+			"480P":  1,
+			"720P":  2,
+			"1080P": 4.8,
+		},
+		"wan2.2-i2v-flash": {
+			"480P": 1,
+			"720P": 2,
+		},
+		"wan2.2-s2v": {
+			"480P": 1,
+			"720P": 0.9 / 0.5,
+		},
+	}
+	var resolution string
+
+	// size match
+	if aliReq.Parameters.Size != "" {
+		toResolution, err := sizeToResolution(aliReq.Parameters.Size)
+		if err != nil {
+			return nil, err
+		}
+		resolution = toResolution
+	} else {
+		resolution = strings.ToUpper(aliReq.Parameters.Resolution)
+		if !strings.HasSuffix(resolution, "P") {
+			resolution = resolution + "P"
+		}
+	}
+	if otherRatio, ok := aliRatios[aliReq.Model]; ok {
+		if ratio, ok := otherRatio[resolution]; ok {
+			otherRatios[fmt.Sprintf("resolution-%s", resolution)] = ratio
+		}
+	}
+	return otherRatios, nil
+}
+
+func (a *TaskAdaptor) convertToAliRequest(info *relaycommon.RelayInfo, req relaycommon.TaskSubmitReq) (*AliVideoRequest, error) {
 	aliReq := &AliVideoRequest{
 		Model: req.Model,
 		Input: AliVideoInput{
@@ -163,28 +262,53 @@ func (a *TaskAdaptor) convertToAliRequest(req relaycommon.TaskSubmitReq) *AliVid

 	// 处理分辨率映射
 	if req.Size != "" {
-		resolution := strings.ToUpper(req.Size)
-		// 支持 480p, 720p, 1080p 或 480P, 720P, 1080P
-		if !strings.HasSuffix(resolution, "P") {
-			resolution = resolution + "P"
+		// text to video size must be contained *
+		if strings.Contains(req.Model, "t2v") && !strings.Contains(req.Size, "*") {
+			return nil, fmt.Errorf("invalid size: %s, example: %s", req.Size, "1920*1080")
+		}
+		if strings.Contains(req.Size, "*") {
+			aliReq.Parameters.Size = req.Size
+		} else {
+			resolution := strings.ToUpper(req.Size)
+			// 支持 480p, 720p, 1080p 或 480P, 720P, 1080P
+			if !strings.HasSuffix(resolution, "P") {
+				resolution = resolution + "P"
+			}
+			aliReq.Parameters.Resolution = resolution
 		}
-		aliReq.Parameters.Resolution = resolution
 	} else {
 		// 根据模型设置默认分辨率
-		if strings.HasPrefix(req.Model, "wan2.5") {
-			aliReq.Parameters.Resolution = "1080P"
-		} else if strings.HasPrefix(req.Model, "wan2.2-i2v-flash") {
-			aliReq.Parameters.Resolution = "720P"
-		} else if strings.HasPrefix(req.Model, "wan2.2-i2v-plus") {
-			aliReq.Parameters.Resolution = "1080P"
+		if strings.Contains(req.Model, "t2v") { // image to video
+			if strings.HasPrefix(req.Model, "wan2.5") {
+				aliReq.Parameters.Size = "1920*1080"
+			} else if strings.HasPrefix(req.Model, "wan2.2") {
+				aliReq.Parameters.Size = "1920*1080"
+			} else {
+				aliReq.Parameters.Size = "1280*720"
+			}
 		} else {
-			aliReq.Parameters.Resolution = "720P"
+			if strings.HasPrefix(req.Model, "wan2.5") {
+				aliReq.Parameters.Resolution = "1080P"
+			} else if strings.HasPrefix(req.Model, "wan2.2-i2v-flash") {
+				aliReq.Parameters.Resolution = "720P"
+			} else if strings.HasPrefix(req.Model, "wan2.2-i2v-plus") {
+				aliReq.Parameters.Resolution = "1080P"
+			} else {
+				aliReq.Parameters.Resolution = "720P"
+			}
 		}
 	}

 	// 处理时长
 	if req.Duration > 0 {
 		aliReq.Parameters.Duration = req.Duration
+	} else if req.Seconds != "" {
+		seconds, err := strconv.Atoi(req.Seconds)
+		if err != nil {
+			return nil, errors.Wrap(err, "convert seconds to int failed")
+		} else {
+			aliReq.Parameters.Duration = seconds
+		}
 	} else {
 		aliReq.Parameters.Duration = 5 // 默认5秒
 	}
@@ -192,11 +316,32 @@ func (a *TaskAdaptor) convertToAliRequest(req relaycommon.TaskSubmitReq) *AliVid
 	// 从 metadata 中提取额外参数
 	if req.Metadata != nil {
 		if metadataBytes, err := common.Marshal(req.Metadata); err == nil {
-			_ = common.Unmarshal(metadataBytes, aliReq)
+			err = common.Unmarshal(metadataBytes, aliReq)
+			if err != nil {
+				return nil, errors.Wrap(err, "unmarshal metadata failed")
+			}
+		} else {
+			return nil, errors.Wrap(err, "marshal metadata failed")
 		}
 	}

-	return aliReq
+	if aliReq.Model != req.Model {
+		return nil, errors.New("can't change model with metadata")
+	}
+
+	info.PriceData.OtherRatios = map[string]float64{
+		"seconds": float64(aliReq.Parameters.Duration),
+	}
+
+	ratios, err := ProcessAliOtherRatios(aliReq)
+	if err != nil {
+		return nil, err
+	}
+	for s, f := range ratios {
+		info.PriceData.OtherRatios[s] = f
+	}
+
+	return aliReq, nil
 }

 // DoRequest delegates to common helper
--- a/relay/channel/task/jimeng/adaptor.go
+++ b/relay/channel/task/jimeng/adaptor.go
@@ -406,12 +406,15 @@ func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*
 	// 即梦视频3.0 ReqKey转换
 	// https://www.volcengine.com/docs/85621/1792707
 	if strings.Contains(r.ReqKey, "jimeng_v30") {
-		if len(req.Images) > 1 {
+		if r.ReqKey == "jimeng_v30_pro" {
+			// 3.0 pro只有固定的jimeng_ti2v_v30_pro
+			r.ReqKey = "jimeng_ti2v_v30_pro"
+		} else if len(req.Images) > 1 {
 			// 多张图片：首尾帧生成
-			r.ReqKey = strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_tail_v30", 1)
+			r.ReqKey = strings.TrimSuffix(strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_tail_v30", 1), "p")
 		} else if len(req.Images) == 1 {
 			// 单张图片：图生视频
-			r.ReqKey = strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_v30", 1)
+			r.ReqKey = strings.TrimSuffix(strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_v30", 1), "p")
 		} else {
 			// 无图片：文生视频
 			r.ReqKey = strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_t2v_v30", 1)
--- a/relay/channel/vertex/adaptor.go
+++ b/relay/channel/vertex/adaptor.go
@@ -168,7 +168,8 @@ func (a *Adaptor) getRequestUrl(info *relaycommon.RelayInfo, modelName, suffix s
 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	suffix := ""
 	if a.RequestMode == RequestModeGemini {
-		if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		if model_setting.GetGeminiSettings().ThinkingAdapterEnabled &&
+			!model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
 			// 新增逻辑：处理 -thinking-<budget> 格式
 			if strings.Contains(info.UpstreamModelName, "-thinking-") {
 				parts := strings.Split(info.UpstreamModelName, "-thinking-")
--- a/relay/channel/volcengine/adaptor.go
+++ b/relay/channel/volcengine/adaptor.go
@@ -16,6 +16,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/channel/openai"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/constant"
+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -291,7 +292,9 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		return nil, errors.New("request is nil")
 	}

-	if strings.HasSuffix(info.UpstreamModelName, "-thinking") && strings.HasPrefix(info.UpstreamModelName, "deepseek") {
+	if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) &&
+		strings.HasSuffix(info.UpstreamModelName, "-thinking") &&
+		strings.HasPrefix(info.UpstreamModelName, "deepseek") {
 		info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 		request.Model = info.UpstreamModelName
 		request.THINKING = json.RawMessage(`{"type": "enabled"}`)
--- a/relay/claude_handler.go
+++ b/relay/claude_handler.go
@@ -67,7 +67,9 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 			request.TopP = 0
 			request.Temperature = common.GetPointer[float64](1.0)
 		}
-		request.Model = strings.TrimSuffix(request.Model, "-thinking")
+		if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
+			request.Model = strings.TrimSuffix(request.Model, "-thinking")
+		}
 		info.UpstreamModelName = request.Model
 	}

--- a/relay/common/relay_utils.go
+++ b/relay/common/relay_utils.go
@@ -121,6 +121,7 @@ func ValidateMultipartDirect(c *gin.Context, info *RelayInfo) *dto.TaskError {

 	prompt = req.Prompt
 	model = req.Model
+	size = req.Size
 	seconds, _ = strconv.Atoi(req.Seconds)
 	if seconds == 0 {
 		seconds = req.Duration
--- a/relay/helper/price.go
+++ b/relay/helper/price.go
@@ -13,6 +13,9 @@ import (
 	"github.com/gin-gonic/gin"
 )

+// https://docs.claude.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration
+const claudeCacheCreation1hMultiplier = 6 / 3.75
+
 // HandleGroupRatio checks for "auto_group" in the context and updates the group ratio and relayInfo.UsingGroup if present
 func HandleGroupRatio(ctx *gin.Context, relayInfo *relaycommon.RelayInfo) types.GroupRatioInfo {
 	groupRatioInfo := types.GroupRatioInfo{
@@ -53,6 +56,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
 	var cacheRatio float64
 	var imageRatio float64
 	var cacheCreationRatio float64
+	var cacheCreationRatio5m float64
+	var cacheCreationRatio1h float64
 	var audioRatio float64
 	var audioCompletionRatio float64
 	var freeModel bool
@@ -76,6 +81,9 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
 		completionRatio = ratio_setting.GetCompletionRatio(info.OriginModelName)
 		cacheRatio, _ = ratio_setting.GetCacheRatio(info.OriginModelName)
 		cacheCreationRatio, _ = ratio_setting.GetCreateCacheRatio(info.OriginModelName)
+		cacheCreationRatio5m = cacheCreationRatio
+		// 固定1h和5min缓存写入价格的比例
+		cacheCreationRatio1h = cacheCreationRatio * claudeCacheCreation1hMultiplier
 		imageRatio, _ = ratio_setting.GetImageRatio(info.OriginModelName)
 		audioRatio = ratio_setting.GetAudioRatio(info.OriginModelName)
 		audioCompletionRatio = ratio_setting.GetAudioCompletionRatio(info.OriginModelName)
@@ -116,6 +124,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
 		AudioRatio:           audioRatio,
 		AudioCompletionRatio: audioCompletionRatio,
 		CacheCreationRatio:   cacheCreationRatio,
+		CacheCreation5mRatio: cacheCreationRatio5m,
+		CacheCreation1hRatio: cacheCreationRatio1h,
 		QuotaToPreConsume:    preConsumedQuota,
 	}

--- a/service/log_info_generate.go
+++ b/service/log_info_generate.go
@@ -92,11 +92,23 @@ func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 }

 func GenerateClaudeOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64,
-	cacheTokens int, cacheRatio float64, cacheCreationTokens int, cacheCreationRatio float64, modelPrice float64, userGroupRatio float64) map[string]interface{} {
+	cacheTokens int, cacheRatio float64,
+	cacheCreationTokens int, cacheCreationRatio float64,
+	cacheCreationTokens5m int, cacheCreationRatio5m float64,
+	cacheCreationTokens1h int, cacheCreationRatio1h float64,
+	modelPrice float64, userGroupRatio float64) map[string]interface{} {
 	info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, userGroupRatio)
 	info["claude"] = true
 	info["cache_creation_tokens"] = cacheCreationTokens
 	info["cache_creation_ratio"] = cacheCreationRatio
+	if cacheCreationTokens5m != 0 {
+		info["cache_creation_tokens_5m"] = cacheCreationTokens5m
+		info["cache_creation_ratio_5m"] = cacheCreationRatio5m
+	}
+	if cacheCreationTokens1h != 0 {
+		info["cache_creation_tokens_1h"] = cacheCreationTokens1h
+		info["cache_creation_ratio_1h"] = cacheCreationRatio1h
+	}
 	return info
 }

--- a/service/quota.go
+++ b/service/quota.go
@@ -251,7 +251,11 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	cacheTokens := usage.PromptTokensDetails.CachedTokens

 	cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio
+	cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio
+	cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio
 	cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
+	cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens
+	cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens

 	if relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
 		promptTokens -= cacheTokens
@@ -269,7 +273,12 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	if !relayInfo.PriceData.UsePrice {
 		calculateQuota = float64(promptTokens)
 		calculateQuota += float64(cacheTokens) * cacheRatio
-		calculateQuota += float64(cacheCreationTokens) * cacheCreationRatio
+		calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m
+		calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h
+		remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h
+		if remainingCacheCreationTokens > 0 {
+			calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio
+		}
 		calculateQuota += float64(completionTokens) * completionRatio
 		calculateQuota = calculateQuota * groupRatio * modelRatio
 	} else {
@@ -322,7 +331,11 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	}

 	other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
-		cacheTokens, cacheRatio, cacheCreationTokens, cacheCreationRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
+		cacheTokens, cacheRatio,
+		cacheCreationTokens, cacheCreationRatio,
+		cacheCreationTokens5m, cacheCreationRatio5m,
+		cacheCreationTokens1h, cacheCreationRatio1h,
+		modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
 	model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
 		ChannelId:        relayInfo.ChannelId,
 		PromptTokens:     promptTokens,
--- a/setting/model_setting/global.go
+++ b/setting/model_setting/global.go
@@ -1,16 +1,23 @@
 package model_setting

 import (
+	"strings"
+
 	"github.com/QuantumNous/new-api/setting/config"
 )

 type GlobalSettings struct {
-	PassThroughRequestEnabled bool `json:"pass_through_request_enabled"`
+	PassThroughRequestEnabled bool     `json:"pass_through_request_enabled"`
+	ThinkingModelBlacklist    []string `json:"thinking_model_blacklist"`
 }

 // 默认配置
 var defaultOpenaiSettings = GlobalSettings{
 	PassThroughRequestEnabled: false,
+	ThinkingModelBlacklist: []string{
+		"moonshotai/kimi-k2-thinking",
+		"kimi-k2-thinking",
+	},
 }

 // 全局实例
@@ -24,3 +31,18 @@ func init() {
 func GetGlobalSettings() *GlobalSettings {
 	return &globalSettings
 }
+
+// ShouldPreserveThinkingSuffix 判断模型是否配置为保留 thinking/-nothinking 后缀
+func ShouldPreserveThinkingSuffix(modelName string) bool {
+	target := strings.TrimSpace(modelName)
+	if target == "" {
+		return false
+	}
+
+	for _, entry := range globalSettings.ThinkingModelBlacklist {
+		if strings.TrimSpace(entry) == target {
+			return true
+		}
+	}
+	return false
+}
--- a/types/price_data.go
+++ b/types/price_data.go
@@ -15,6 +15,8 @@ type PriceData struct {
 	CompletionRatio      float64
 	CacheRatio           float64
 	CacheCreationRatio   float64
+	CacheCreation5mRatio float64
+	CacheCreation1hRatio float64
 	ImageRatio           float64
 	AudioRatio           float64
 	AudioCompletionRatio float64
@@ -31,5 +33,5 @@ type PerCallPriceData struct {
 }

 func (p PriceData) ToSetting() string {
-	return fmt.Sprintf("ModelPrice: %f, ModelRatio: %f, CompletionRatio: %f, CacheRatio: %f, GroupRatio: %f, UsePrice: %t, CacheCreationRatio: %f, QuotaToPreConsume: %d, ImageRatio: %f, AudioRatio: %f, AudioCompletionRatio: %f", p.ModelPrice, p.ModelRatio, p.CompletionRatio, p.CacheRatio, p.GroupRatioInfo.GroupRatio, p.UsePrice, p.CacheCreationRatio, p.QuotaToPreConsume, p.ImageRatio, p.AudioRatio, p.AudioCompletionRatio)
+	return fmt.Sprintf("ModelPrice: %f, ModelRatio: %f, CompletionRatio: %f, CacheRatio: %f, GroupRatio: %f, UsePrice: %t, CacheCreationRatio: %f, CacheCreation5mRatio: %f, CacheCreation1hRatio: %f, QuotaToPreConsume: %d, ImageRatio: %f, AudioRatio: %f, AudioCompletionRatio: %f", p.ModelPrice, p.ModelRatio, p.CompletionRatio, p.CacheRatio, p.GroupRatioInfo.GroupRatio, p.UsePrice, p.CacheCreationRatio, p.CacheCreation5mRatio, p.CacheCreation1hRatio, p.QuotaToPreConsume, p.ImageRatio, p.AudioRatio, p.AudioCompletionRatio)
 }
--- a/web/src/components/settings/ModelSetting.jsx
+++ b/web/src/components/settings/ModelSetting.jsx
@@ -37,6 +37,7 @@ const ModelSetting = () => {
    'claude.default_max_tokens': '',
    'claude.thinking_adapter_budget_tokens_percentage': 0.8,
    'global.pass_through_request_enabled': false,
+    'global.thinking_model_blacklist': '[]',
    'general_setting.ping_interval_enabled': false,
    'general_setting.ping_interval_seconds': 60,
    'gemini.thinking_adapter_enabled': false,
@@ -56,7 +57,8 @@ const ModelSetting = () => {
          item.key === 'gemini.version_settings' ||
          item.key === 'claude.model_headers_settings' ||
          item.key === 'claude.default_max_tokens' ||
-          item.key === 'gemini.supported_imagine_models'
+          item.key === 'gemini.supported_imagine_models' ||
+          item.key === 'global.thinking_model_blacklist'
        ) {
          if (item.value !== '') {
            item.value = JSON.stringify(JSON.parse(item.value), null, 2);
--- a/web/src/components/table/channels/modals/EditTagModal.jsx
+++ b/web/src/components/table/channels/modals/EditTagModal.jsx
@@ -45,6 +45,7 @@ import {
  IconBookmark,
  IconUser,
  IconCode,
+  IconSetting,
 } from '@douyinfe/semi-icons';
 import { getChannelModels } from '../../../../helpers';
 import { useTranslation } from 'react-i18next';
@@ -69,6 +70,8 @@ const EditTagModal = (props) => {
    model_mapping: null,
    groups: [],
    models: [],
+    param_override: null,
+    header_override: null,
  };
  const [inputs, setInputs] = useState(originInputs);
  const formApiRef = useRef(null);
@@ -190,12 +193,48 @@ const EditTagModal = (props) => {
    if (formVals.models && formVals.models.length > 0) {
      data.models = formVals.models.join(',');
    }
+    if (
+      formVals.param_override !== undefined &&
+      formVals.param_override !== null
+    ) {
+      if (typeof formVals.param_override !== 'string') {
+        showInfo('参数覆盖必须是合法的 JSON 格式！');
+        setLoading(false);
+        return;
+      }
+      const trimmedParamOverride = formVals.param_override.trim();
+      if (trimmedParamOverride !== '' && !verifyJSON(trimmedParamOverride)) {
+        showInfo('参数覆盖必须是合法的 JSON 格式！');
+        setLoading(false);
+        return;
+      }
+      data.param_override = trimmedParamOverride;
+    }
+    if (
+      formVals.header_override !== undefined &&
+      formVals.header_override !== null
+    ) {
+      if (typeof formVals.header_override !== 'string') {
+        showInfo('请求头覆盖必须是合法的 JSON 格式！');
+        setLoading(false);
+        return;
+      }
+      const trimmedHeaderOverride = formVals.header_override.trim();
+      if (trimmedHeaderOverride !== '' && !verifyJSON(trimmedHeaderOverride)) {
+        showInfo('请求头覆盖必须是合法的 JSON 格式！');
+        setLoading(false);
+        return;
+      }
+      data.header_override = trimmedHeaderOverride;
+    }
    data.new_tag = formVals.new_tag;
    if (
      data.model_mapping === undefined &&
      data.groups === undefined &&
      data.models === undefined &&
-      data.new_tag === undefined
+      data.new_tag === undefined &&
+      data.param_override === undefined &&
+      data.header_override === undefined
    ) {
      showWarning('没有任何修改！');
      setLoading(false);
@@ -491,6 +530,157 @@ const EditTagModal = (props) => {
                </div>
              </Card>

+              <Card className='!rounded-2xl shadow-sm border-0 mb-6'>
+                {/* Header: Advanced Settings */}
+                <div className='flex items-center mb-2'>
+                  <Avatar size='small' color='orange' className='mr-2 shadow-md'>
+                    <IconSetting size={16} />
+                  </Avatar>
+                  <div>
+                    <Text className='text-lg font-medium'>{t('高级设置')}</Text>
+                    <div className='text-xs text-gray-600'>
+                      {t('渠道的高级配置选项')}
+                    </div>
+                  </div>
+                </div>
+
+                <div className='space-y-4'>
+                  <Form.TextArea
+                    field='param_override'
+                    label={t('参数覆盖')}
+                    placeholder={
+                      t(
+                        '此项可选，用于覆盖请求参数。不支持覆盖 stream 参数',
+                      ) +
+                      '\n' +
+                      t('旧格式（直接覆盖）：') +
+                      '\n{\n  "temperature": 0,\n  "max_tokens": 1000\n}' +
+                      '\n\n' +
+                      t('新格式（支持条件判断与json自定义）：') +
+                      '\n{\n  "operations": [\n    {\n      "path": "temperature",\n      "mode": "set",\n      "value": 0.7,\n      "conditions": [\n        {\n          "path": "model",\n          "mode": "prefix",\n          "value": "gpt"\n        }\n      ]\n    }\n  ]\n}'
+                    }
+                    autosize
+                    showClear
+                    onChange={(value) =>
+                      handleInputChange('param_override', value)
+                    }
+                    extraText={
+                      <div className='flex gap-2 flex-wrap'>
+                        <Text
+                          className='!text-semi-color-primary cursor-pointer'
+                          onClick={() =>
+                            handleInputChange(
+                              'param_override',
+                              JSON.stringify({ temperature: 0 }, null, 2),
+                            )
+                          }
+                        >
+                          {t('旧格式模板')}
+                        </Text>
+                        <Text
+                          className='!text-semi-color-primary cursor-pointer'
+                          onClick={() =>
+                            handleInputChange(
+                              'param_override',
+                              JSON.stringify(
+                                {
+                                  operations: [
+                                    {
+                                      path: 'temperature',
+                                      mode: 'set',
+                                      value: 0.7,
+                                      conditions: [
+                                        {
+                                          path: 'model',
+                                          mode: 'prefix',
+                                          value: 'gpt',
+                                        },
+                                      ],
+                                      logic: 'AND',
+                                    },
+                                  ],
+                                },
+                                null,
+                                2,
+                              ),
+                            )
+                          }
+                        >
+                          {t('新格式模板')}
+                        </Text>
+                        <Text
+                          className='!text-semi-color-primary cursor-pointer'
+                          onClick={() =>
+                            handleInputChange('param_override', null)
+                          }
+                        >
+                          {t('不更改')}
+                        </Text>
+                      </div>
+                    }
+                  />
+
+                  <Form.TextArea
+                    field='header_override'
+                    label={t('请求头覆盖')}
+                    placeholder={
+                      t('此项可选，用于覆盖请求头参数') +
+                      '\n' +
+                      t('格式示例：') +
+                      '\n{\n  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0",\n  "Authorization": "Bearer {api_key}"\n}'
+                    }
+                    autosize
+                    showClear
+                    onChange={(value) =>
+                      handleInputChange('header_override', value)
+                    }
+                    extraText={
+                      <div className='flex flex-col gap-1'>
+                        <div className='flex gap-2 flex-wrap items-center'>
+                          <Text
+                            className='!text-semi-color-primary cursor-pointer'
+                            onClick={() =>
+                              handleInputChange(
+                                'header_override',
+                                JSON.stringify(
+                                  {
+                                    'User-Agent':
+                                      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0',
+                                    Authorization: 'Bearer {api_key}',
+                                  },
+                                  null,
+                                  2,
+                                ),
+                              )
+                            }
+                          >
+                            {t('填入模板')}
+                          </Text>
+                          <Text
+                            className='!text-semi-color-primary cursor-pointer'
+                            onClick={() =>
+                              handleInputChange('header_override', null)
+                            }
+                          >
+                            {t('不更改')}
+                          </Text>
+                        </div>
+                        <div>
+                          <Text type='tertiary' size='small'>
+                            {t('支持变量：')}
+                          </Text>
+                          <div className='text-xs text-tertiary ml-2'>
+                            <div>
+                              {t('渠道密钥')}: {'{api_key}'}
+                            </div>
+                          </div>
+                        </div>
+                      </div>
+                    }
+                  />
+                </div>
+              </Card>
+
              <Card className='!rounded-2xl shadow-sm border-0'>
                {/* Header: Group Settings */}
                <div className='flex items-center mb-2'>
--- a/web/src/components/table/model-pricing/filter/PricingTags.jsx
+++ b/web/src/components/table/model-pricing/filter/PricingTags.jsx
@@ -44,7 +44,7 @@ const PricingTags = ({
    (allModels.length > 0 ? allModels : models).forEach((model) => {
      if (model.tags) {
        model.tags
-          .split(/[,;|\s]+/) // 逗号、分号、竖线或空白字符
+          .split(/[,;|]+/) // 逗号、分号或竖线（保留空格，允许多词标签如 "open weights"）
          .map((tag) => tag.trim())
          .filter(Boolean)
          .forEach((tag) => tagSet.add(tag.toLowerCase()));
@@ -64,7 +64,7 @@ const PricingTags = ({
        if (!model.tags) return false;
        return model.tags
          .toLowerCase()
-          .split(/[,;|\s]+/)
+          .split(/[,;|]+/)
          .map((tg) => tg.trim())
          .includes(tagLower);
      }).length;
--- a/web/src/components/table/tokens/modals/EditTokenModal.jsx
+++ b/web/src/components/table/tokens/modals/EditTokenModal.jsx
@@ -66,9 +66,9 @@ const EditTokenModal = (props) => {

  const getInitValues = () => ({
    name: '',
-    remain_quota: 500000,
+    remain_quota: 0,
    expired_time: -1,
-    unlimited_quota: false,
+    unlimited_quota: true,
    model_limits_enabled: false,
    model_limits: [],
    allow_ips: '',
--- a/web/src/components/table/usage-logs/UsageLogsColumnDefs.jsx
+++ b/web/src/components/table/usage-logs/UsageLogsColumnDefs.jsx
@@ -551,6 +551,10 @@ export const getLogsColumns = ({
              other.cache_ratio || 1.0,
              other.cache_creation_tokens || 0,
              other.cache_creation_ratio || 1.0,
+              other.cache_creation_tokens_5m || 0,
+              other.cache_creation_ratio_5m || other.cache_creation_ratio || 1.0,
+              other.cache_creation_tokens_1h || 0,
+              other.cache_creation_ratio_1h || other.cache_creation_ratio || 1.0,
              false,
              1.0,
              other?.is_system_prompt_overwritten,
@@ -565,6 +569,10 @@ export const getLogsColumns = ({
              other.cache_ratio || 1.0,
              0,
              1.0,
+              0,
+              1.0,
+              0,
+              1.0,
              false,
              1.0,
              other?.is_system_prompt_overwritten,
--- a/web/src/helpers/base64.js
+++ b/web/src/helpers/base64.js
@@ -0,0 +1,56 @@
+/*
+Copyright (C) 2025 QuantumNous
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+For commercial licensing, please contact support@quantumnous.com
+*/
+
+const toBinaryString = (text) => {
+  if (typeof TextEncoder !== 'undefined') {
+    const bytes = new TextEncoder().encode(text);
+    let binary = '';
+
+    bytes.forEach((byte) => {
+      binary += String.fromCharCode(byte);
+    });
+
+    return binary;
+  }
+
+  return encodeURIComponent(text).replace(/%([0-9A-F]{2})/g, (_, hex) =>
+    String.fromCharCode(parseInt(hex, 16)),
+  );
+};
+
+export const encodeToBase64 = (value) => {
+  const input = value == null ? '' : String(value);
+
+  if (typeof window === 'undefined') {
+    if (typeof Buffer !== 'undefined') {
+      return Buffer.from(input, 'utf-8').toString('base64');
+    }
+    if (
+      typeof globalThis !== 'undefined' &&
+      typeof globalThis.btoa === 'function'
+    ) {
+      return globalThis.btoa(toBinaryString(input));
+    }
+    throw new Error(
+      'Base64 encoding is unavailable in the current environment',
+    );
+  }
+
+  return window.btoa(toBinaryString(input));
+};
--- a/web/src/helpers/index.js
+++ b/web/src/helpers/index.js
@@ -20,6 +20,7 @@ For commercial licensing, please contact support@quantumnous.com
 export * from './history';
 export * from './auth';
 export * from './utils';
+export * from './base64';
 export * from './api';
 export * from './render';
 export * from './log';
--- a/web/src/helpers/render.jsx
+++ b/web/src/helpers/render.jsx
@@ -1046,6 +1046,10 @@ function renderPriceSimpleCore({
  cacheRatio = 1.0,
  cacheCreationTokens = 0,
  cacheCreationRatio = 1.0,
+  cacheCreationTokens5m = 0,
+  cacheCreationRatio5m = 1.0,
+  cacheCreationTokens1h = 0,
+  cacheCreationRatio1h = 1.0,
  image = false,
  imageRatio = 1.0,
  isSystemPromptOverride = false,
@@ -1064,17 +1068,40 @@ function renderPriceSimpleCore({
    });
  }

+  const hasSplitCacheCreation =
+    cacheCreationTokens5m > 0 || cacheCreationTokens1h > 0;
+
+  const shouldShowLegacyCacheCreation =
+    !hasSplitCacheCreation && cacheCreationTokens !== 0;
+
+  const shouldShowCache = cacheTokens !== 0;
+  const shouldShowCacheCreation5m =
+    hasSplitCacheCreation && cacheCreationTokens5m > 0;
+  const shouldShowCacheCreation1h =
+    hasSplitCacheCreation && cacheCreationTokens1h > 0;
+
  const parts = [];
  // base: model ratio
  parts.push(i18next.t('模型: {{ratio}}'));

  // cache part (label differs when with image)
-  if (cacheTokens !== 0) {
+  if (shouldShowCache) {
    parts.push(i18next.t('缓存: {{cacheRatio}}'));
  }

-  // cache creation part (Claude specific if passed)
-  if (cacheCreationTokens !== 0) {
+  if (hasSplitCacheCreation) {
+    if (shouldShowCacheCreation5m && shouldShowCacheCreation1h) {
+      parts.push(
+        i18next.t(
+          '缓存创建: 5m {{cacheCreationRatio5m}} / 1h {{cacheCreationRatio1h}}',
+        ),
+      );
+    } else if (shouldShowCacheCreation5m) {
+      parts.push(i18next.t('缓存创建: 5m {{cacheCreationRatio5m}}'));
+    } else if (shouldShowCacheCreation1h) {
+      parts.push(i18next.t('缓存创建: 1h {{cacheCreationRatio1h}}'));
+    }
+  } else if (shouldShowLegacyCacheCreation) {
    parts.push(i18next.t('缓存创建: {{cacheCreationRatio}}'));
  }

@@ -1091,6 +1118,8 @@ function renderPriceSimpleCore({
    groupRatio: finalGroupRatio,
    cacheRatio: cacheRatio,
    cacheCreationRatio: cacheCreationRatio,
+    cacheCreationRatio5m: cacheCreationRatio5m,
+    cacheCreationRatio1h: cacheCreationRatio1h,
    imageRatio: imageRatio,
  });

@@ -1450,6 +1479,10 @@ export function renderModelPriceSimple(
  cacheRatio = 1.0,
  cacheCreationTokens = 0,
  cacheCreationRatio = 1.0,
+  cacheCreationTokens5m = 0,
+  cacheCreationRatio5m = 1.0,
+  cacheCreationTokens1h = 0,
+  cacheCreationRatio1h = 1.0,
  image = false,
  imageRatio = 1.0,
  isSystemPromptOverride = false,
@@ -1464,6 +1497,10 @@ export function renderModelPriceSimple(
    cacheRatio,
    cacheCreationTokens,
    cacheCreationRatio,
+    cacheCreationTokens5m,
+    cacheCreationRatio5m,
+    cacheCreationTokens1h,
+    cacheCreationRatio1h,
    image,
    imageRatio,
    isSystemPromptOverride,
@@ -1681,6 +1718,10 @@ export function renderClaudeModelPrice(
  cacheRatio = 1.0,
  cacheCreationTokens = 0,
  cacheCreationRatio = 1.0,
+  cacheCreationTokens5m = 0,
+  cacheCreationRatio5m = 1.0,
+  cacheCreationTokens1h = 0,
+  cacheCreationRatio1h = 1.0,
 ) {
  const { ratio: effectiveGroupRatio, label: ratioLabel } = getEffectiveRatio(
    groupRatio,
@@ -1710,20 +1751,121 @@ export function renderClaudeModelPrice(
    const completionRatioValue = completionRatio || 0;
    const inputRatioPrice = modelRatio * 2.0;
    const completionRatioPrice = modelRatio * 2.0 * completionRatioValue;
-    let cacheRatioPrice = (modelRatio * 2.0 * cacheRatio).toFixed(2);
-    let cacheCreationRatioPrice = modelRatio * 2.0 * cacheCreationRatio;
+    const cacheRatioPrice = modelRatio * 2.0 * cacheRatio;
+    const cacheCreationRatioPrice = modelRatio * 2.0 * cacheCreationRatio;
+    const cacheCreationRatioPrice5m = modelRatio * 2.0 * cacheCreationRatio5m;
+    const cacheCreationRatioPrice1h = modelRatio * 2.0 * cacheCreationRatio1h;
+
+    const hasSplitCacheCreation =
+      cacheCreationTokens5m > 0 || cacheCreationTokens1h > 0;
+
+    const shouldShowCache = cacheTokens > 0;
+    const shouldShowLegacyCacheCreation =
+      !hasSplitCacheCreation && cacheCreationTokens > 0;
+    const shouldShowCacheCreation5m =
+      hasSplitCacheCreation && cacheCreationTokens5m > 0;
+    const shouldShowCacheCreation1h =
+      hasSplitCacheCreation && cacheCreationTokens1h > 0;

    // Calculate effective input tokens (non-cached + cached with ratio applied + cache creation with ratio applied)
    const nonCachedTokens = inputTokens;
    const effectiveInputTokens =
      nonCachedTokens +
      cacheTokens * cacheRatio +
-      cacheCreationTokens * cacheCreationRatio;
+      cacheCreationTokens * cacheCreationRatio +
+      cacheCreationTokens5m * cacheCreationRatio5m +
+      cacheCreationTokens1h * cacheCreationRatio1h;

    let price =
      (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio +
      (completionTokens / 1000000) * completionRatioPrice * groupRatio;

+    const inputUnitPrice = inputRatioPrice * rate;
+    const completionUnitPrice = completionRatioPrice * rate;
+    const cacheUnitPrice = cacheRatioPrice * rate;
+    const cacheCreationUnitPrice = cacheCreationRatioPrice * rate;
+    const cacheCreationUnitPrice5m = cacheCreationRatioPrice5m * rate;
+    const cacheCreationUnitPrice1h = cacheCreationRatioPrice1h * rate;
+    const cacheCreationUnitPriceTotal =
+      cacheCreationUnitPrice5m + cacheCreationUnitPrice1h;
+
+    const breakdownSegments = [
+      i18next.t('提示 {{input}} tokens / 1M tokens * {{symbol}}{{price}}', {
+        input: inputTokens,
+        symbol,
+        price: inputUnitPrice.toFixed(6),
+      }),
+    ];
+
+    if (shouldShowCache) {
+      breakdownSegments.push(
+        i18next.t(
+          '缓存 {{tokens}} tokens / 1M tokens * {{symbol}}{{price}} (倍率: {{ratio}})',
+          {
+            tokens: cacheTokens,
+            symbol,
+            price: cacheUnitPrice.toFixed(6),
+            ratio: cacheRatio,
+          },
+        ),
+      );
+    }
+
+    if (shouldShowLegacyCacheCreation) {
+      breakdownSegments.push(
+        i18next.t(
+          '缓存创建 {{tokens}} tokens / 1M tokens * {{symbol}}{{price}} (倍率: {{ratio}})',
+          {
+            tokens: cacheCreationTokens,
+            symbol,
+            price: cacheCreationUnitPrice.toFixed(6),
+            ratio: cacheCreationRatio,
+          },
+        ),
+      );
+    }
+
+    if (shouldShowCacheCreation5m) {
+      breakdownSegments.push(
+        i18next.t(
+          '5m缓存创建 {{tokens}} tokens / 1M tokens * {{symbol}}{{price}} (倍率: {{ratio}})',
+          {
+            tokens: cacheCreationTokens5m,
+            symbol,
+            price: cacheCreationUnitPrice5m.toFixed(6),
+            ratio: cacheCreationRatio5m,
+          },
+        ),
+      );
+    }
+
+    if (shouldShowCacheCreation1h) {
+      breakdownSegments.push(
+        i18next.t(
+          '1h缓存创建 {{tokens}} tokens / 1M tokens * {{symbol}}{{price}} (倍率: {{ratio}})',
+          {
+            tokens: cacheCreationTokens1h,
+            symbol,
+            price: cacheCreationUnitPrice1h.toFixed(6),
+            ratio: cacheCreationRatio1h,
+          },
+        ),
+      );
+    }
+
+    breakdownSegments.push(
+      i18next.t(
+        '补全 {{completion}} tokens / 1M tokens * {{symbol}}{{price}}',
+        {
+          completion: completionTokens,
+          symbol,
+          price: completionUnitPrice.toFixed(6),
+        },
+      ),
+    );
+
+    const breakdownText = breakdownSegments.join(' + ');
+
    return (
      <>
        <article>
@@ -1744,7 +1886,7 @@ export function renderClaudeModelPrice(
              },
            )}
          </p>
-          {cacheTokens > 0 && (
+          {shouldShowCache && (
            <p>
              {i18next.t(
                '缓存价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存倍率: {{cacheRatio}})',
@@ -1752,13 +1894,13 @@ export function renderClaudeModelPrice(
                  symbol: symbol,
                  price: (inputRatioPrice * rate).toFixed(6),
                  ratio: cacheRatio,
-                  total: (cacheRatioPrice * rate).toFixed(2),
+                  total: cacheUnitPrice.toFixed(6),
                  cacheRatio: cacheRatio,
                },
              )}
            </p>
          )}
-          {cacheCreationTokens > 0 && (
+          {shouldShowLegacyCacheCreation && (
            <p>
              {i18next.t(
                '缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})',
@@ -1766,49 +1908,65 @@ export function renderClaudeModelPrice(
                  symbol: symbol,
                  price: (inputRatioPrice * rate).toFixed(6),
                  ratio: cacheCreationRatio,
-                  total: (cacheCreationRatioPrice * rate).toFixed(6),
+                  total: cacheCreationUnitPrice.toFixed(6),
                  cacheCreationRatio: cacheCreationRatio,
                },
              )}
            </p>
          )}
+          {shouldShowCacheCreation5m && (
+            <p>
+              {i18next.t(
+                '5m缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (5m缓存创建倍率: {{cacheCreationRatio5m}})',
+                {
+                  symbol: symbol,
+                  price: (inputRatioPrice * rate).toFixed(6),
+                  ratio: cacheCreationRatio5m,
+                  total: cacheCreationUnitPrice5m.toFixed(6),
+                  cacheCreationRatio5m: cacheCreationRatio5m,
+                },
+              )}
+            </p>
+          )}
+          {shouldShowCacheCreation1h && (
+            <p>
+              {i18next.t(
+                '1h缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (1h缓存创建倍率: {{cacheCreationRatio1h}})',
+                {
+                  symbol: symbol,
+                  price: (inputRatioPrice * rate).toFixed(6),
+                  ratio: cacheCreationRatio1h,
+                  total: cacheCreationUnitPrice1h.toFixed(6),
+                  cacheCreationRatio1h: cacheCreationRatio1h,
+                },
+              )}
+            </p>
+          )}
+          {shouldShowCacheCreation5m && shouldShowCacheCreation1h && (
+            <p>
+              {i18next.t(
+                '缓存创建价格合计：5m {{symbol}}{{five}} + 1h {{symbol}}{{one}} = {{symbol}}{{total}} / 1M tokens',
+                {
+                  symbol: symbol,
+                  five: cacheCreationUnitPrice5m.toFixed(6),
+                  one: cacheCreationUnitPrice1h.toFixed(6),
+                  total: cacheCreationUnitPriceTotal.toFixed(6),
+                },
+              )}
+            </p>
+          )}
          <p></p>
          <p>
-            {cacheTokens > 0 || cacheCreationTokens > 0
-              ? i18next.t(
-                  '提示 {{nonCacheInput}} tokens / 1M tokens * {{symbol}}{{price}} + 缓存 {{cacheInput}} tokens / 1M tokens * {{symbol}}{{cachePrice}} + 缓存创建 {{cacheCreationInput}} tokens / 1M tokens * {{symbol}}{{cacheCreationPrice}} + 补全 {{completion}} tokens / 1M tokens * {{symbol}}{{compPrice}} * {{ratioType}} {{ratio}} = {{symbol}}{{total}}',
-                  {
-                    nonCacheInput: nonCachedTokens,
-                    cacheInput: cacheTokens,
-                    cacheRatio: cacheRatio,
-                    cacheCreationInput: cacheCreationTokens,
-                    cacheCreationRatio: cacheCreationRatio,
-                    symbol: symbol,
-                    cachePrice: (cacheRatioPrice * rate).toFixed(2),
-                    cacheCreationPrice: (
-                      cacheCreationRatioPrice * rate
-                    ).toFixed(6),
-                    price: (inputRatioPrice * rate).toFixed(6),
-                    completion: completionTokens,
-                    compPrice: (completionRatioPrice * rate).toFixed(6),
-                    ratio: groupRatio,
-                    ratioType: ratioLabel,
-                    total: (price * rate).toFixed(6),
-                  },
-                )
-              : i18next.t(
-                  '提示 {{input}} tokens / 1M tokens * {{symbol}}{{price}} + 补全 {{completion}} tokens / 1M tokens * {{symbol}}{{compPrice}} * {{ratioType}} {{ratio}} = {{symbol}}{{total}}',
-                  {
-                    input: inputTokens,
-                    symbol: symbol,
-                    price: (inputRatioPrice * rate).toFixed(6),
-                    completion: completionTokens,
-                    compPrice: (completionRatioPrice * rate).toFixed(6),
-                    ratio: groupRatio,
-                    ratioType: ratioLabel,
-                    total: (price * rate).toFixed(6),
-                  },
-                )}
+            {i18next.t(
+              '{{breakdown}} * {{ratioType}} {{ratio}} = {{symbol}}{{total}}',
+              {
+                breakdown: breakdownText,
+                ratioType: ratioLabel,
+                ratio: groupRatio,
+                symbol: symbol,
+                total: (price * rate).toFixed(6),
+              },
+            )}
          </p>
          <p>{i18next.t('仅供参考，以实际扣费为准')}</p>
        </article>
@@ -1825,6 +1983,10 @@ export function renderClaudeLogContent(
  user_group_ratio,
  cacheRatio = 1.0,
  cacheCreationRatio = 1.0,
+  cacheCreationTokens5m = 0,
+  cacheCreationRatio5m = 1.0,
+  cacheCreationTokens1h = 0,
+  cacheCreationRatio1h = 1.0,
 ) {
  const { ratio: effectiveGroupRatio, label: ratioLabel } = getEffectiveRatio(
    groupRatio,
@@ -1843,17 +2005,58 @@ export function renderClaudeLogContent(
      ratio: groupRatio,
    });
  } else {
-    return i18next.t(
-      '模型倍率 {{modelRatio}}，输出倍率 {{completionRatio}}，缓存倍率 {{cacheRatio}}，缓存创建倍率 {{cacheCreationRatio}}，{{ratioType}} {{ratio}}',
-      {
-        modelRatio: modelRatio,
-        completionRatio: completionRatio,
-        cacheRatio: cacheRatio,
-        cacheCreationRatio: cacheCreationRatio,
+    const hasSplitCacheCreation =
+      cacheCreationTokens5m > 0 || cacheCreationTokens1h > 0;
+    const shouldShowCacheCreation5m =
+      hasSplitCacheCreation && cacheCreationTokens5m > 0;
+    const shouldShowCacheCreation1h =
+      hasSplitCacheCreation && cacheCreationTokens1h > 0;
+
+    let cacheCreationPart = null;
+    if (hasSplitCacheCreation) {
+      if (shouldShowCacheCreation5m && shouldShowCacheCreation1h) {
+        cacheCreationPart = i18next.t(
+          '缓存创建倍率 5m {{cacheCreationRatio5m}} / 1h {{cacheCreationRatio1h}}',
+          {
+            cacheCreationRatio5m,
+            cacheCreationRatio1h,
+          },
+        );
+      } else if (shouldShowCacheCreation5m) {
+        cacheCreationPart = i18next.t(
+          '缓存创建倍率 5m {{cacheCreationRatio5m}}',
+          {
+            cacheCreationRatio5m,
+          },
+        );
+      } else if (shouldShowCacheCreation1h) {
+        cacheCreationPart = i18next.t(
+          '缓存创建倍率 1h {{cacheCreationRatio1h}}',
+          {
+            cacheCreationRatio1h,
+          },
+        );
+      }
+    }
+
+    if (!cacheCreationPart) {
+      cacheCreationPart = i18next.t('缓存创建倍率 {{cacheCreationRatio}}', {
+        cacheCreationRatio,
+      });
+    }
+
+    const parts = [
+      i18next.t('模型倍率 {{modelRatio}}', { modelRatio }),
+      i18next.t('输出倍率 {{completionRatio}}', { completionRatio }),
+      i18next.t('缓存倍率 {{cacheRatio}}', { cacheRatio }),
+      cacheCreationPart,
+      i18next.t('{{ratioType}} {{ratio}}', {
        ratioType: ratioLabel,
        ratio: groupRatio,
-      },
-    );
+      }),
+    ];
+
+    return parts.join('，');
  }
 }

--- a/web/src/hooks/model-pricing/useModelPricingData.jsx
+++ b/web/src/hooks/model-pricing/useModelPricingData.jsx
@@ -128,7 +128,7 @@ export const useModelPricingData = () => {
        if (!model.tags) return false;
        const tagsArr = model.tags
          .toLowerCase()
-          .split(/[,;|\s]+/)
+          .split(/[,;|]+/)
          .map((tag) => tag.trim())
          .filter(Boolean);
        return tagsArr.includes(tagLower);
--- a/web/src/hooks/model-pricing/usePricingFilterCounts.js
+++ b/web/src/hooks/model-pricing/usePricingFilterCounts.js
@@ -23,7 +23,7 @@ import { useMemo } from 'react';
 const normalizeTags = (tags = '') =>
  tags
    .toLowerCase()
-    .split(/[,;|\s]+/)
+    .split(/[,;|]+/)
    .map((t) => t.trim())
    .filter(Boolean);

--- a/web/src/hooks/tokens/useTokensData.jsx
+++ b/web/src/hooks/tokens/useTokensData.jsx
@@ -20,7 +20,13 @@ For commercial licensing, please contact support@quantumnous.com
 import { useState, useEffect } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Modal } from '@douyinfe/semi-ui';
-import { API, copy, showError, showSuccess } from '../../helpers';
+import {
+  API,
+  copy,
+  showError,
+  showSuccess,
+  encodeToBase64,
+} from '../../helpers';
 import { ITEMS_PER_PAGE } from '../../constants';
 import { useTableCompactMode } from '../common/useTableCompactMode';

@@ -136,7 +142,7 @@ export const useTokensData = (openFluentNotification) => {
        apiKey: 'sk-' + record.key,
      };
      let encodedConfig = encodeURIComponent(
-        btoa(JSON.stringify(cherryConfig)),
+        encodeToBase64(JSON.stringify(cherryConfig)),
      );
      url = url.replaceAll('{cherryConfig}', encodedConfig);
    } else {
--- a/web/src/hooks/usage-logs/useUsageLogsData.jsx
+++ b/web/src/hooks/usage-logs/useUsageLogsData.jsx
@@ -361,6 +361,10 @@ export const useLogsData = () => {
                other?.user_group_ratio,
                other.cache_ratio || 1.0,
                other.cache_creation_ratio || 1.0,
+                other.cache_creation_tokens_5m || 0,
+                other.cache_creation_ratio_5m || other.cache_creation_ratio || 1.0,
+                other.cache_creation_tokens_1h || 0,
+                other.cache_creation_ratio_1h || other.cache_creation_ratio || 1.0,
              )
            : renderLogContent(
                other?.model_ratio,
@@ -429,6 +433,10 @@ export const useLogsData = () => {
            other.cache_ratio || 1.0,
            other.cache_creation_tokens || 0,
            other.cache_creation_ratio || 1.0,
+            other.cache_creation_tokens_5m || 0,
+            other.cache_creation_ratio_5m || other.cache_creation_ratio || 1.0,
+            other.cache_creation_tokens_1h || 0,
+            other.cache_creation_ratio_1h || other.cache_creation_ratio || 1.0,
          );
        } else {
          content = renderModelPrice(
--- a/web/src/i18n/locales/en.json
+++ b/web/src/i18n/locales/en.json
@@ -561,6 +561,9 @@
    "启用绘图功能": "Enable drawing function",
    "启用请求体透传功能": "Enable request body pass-through functionality",
    "启用请求透传": "Enable request pass-through",
+    "禁用思考处理的模型列表": "Models skipping thinking handling",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Models in this list will not automatically add or remove the -thinking/-nothinking suffix.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Enter a JSON array, e.g. [\"model-a\",\"model-b\"]",
    "启用额度消费日志记录": "Enable quota consumption logging",
    "启用验证": "Enable Authentication",
    "周": "week",
@@ -1516,6 +1519,10 @@
    "缓存倍率": "Cache ratio",
    "缓存创建 Tokens": "Cache Creation Tokens",
    "缓存创建: {{cacheCreationRatio}}": "Cache creation: {{cacheCreationRatio}}",
+    "缓存创建: 5m {{cacheCreationRatio5m}}": "Cache creation: 5m {{cacheCreationRatio5m}}",
+    "缓存创建: 1h {{cacheCreationRatio1h}}": "Cache creation: 1h {{cacheCreationRatio1h}}",
+    "缓存创建倍率 5m {{cacheCreationRatio5m}}": "Cache creation multiplier 5m {{cacheCreationRatio5m}}",
+    "缓存创建倍率 1h {{cacheCreationRatio1h}}": "Cache creation multiplier 1h {{cacheCreationRatio1h}}",
    "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})": "Cache creation price: {{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (Cache creation ratio: {{cacheCreationRatio}})",
    "编辑": "Edit",
    "编辑API": "Edit API",
@@ -2104,4 +2111,4 @@
    "统一的": "The Unified",
    "大模型接口网关": "LLM API Gateway"
  }
-}
+}
--- a/web/src/i18n/locales/fr.json
+++ b/web/src/i18n/locales/fr.json
@@ -564,6 +564,9 @@
    "启用绘图功能": "Activer la fonction de dessin",
    "启用请求体透传功能": "Activer la fonctionnalité de transmission du corps de la requête",
    "启用请求透传": "Activer la transmission de la requête",
+    "禁用思考处理的模型列表": "Liste noire des modèles pour le traitement thinking",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Les modèles listés ici n'ajouteront ni ne retireront automatiquement le suffixe -thinking/-nothinking.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Saisissez un tableau JSON, par ex. [\"model-a\",\"model-b\"]",
    "启用额度消费日志记录": "Activer la journalisation de la consommation de quota",
    "启用验证": "Activer l'authentification",
    "周": "semaine",
@@ -1525,6 +1528,10 @@
    "缓存倍率": "Ratio de cache",
    "缓存创建 Tokens": "Jetons de création de cache",
    "缓存创建: {{cacheCreationRatio}}": "Création de cache : {{cacheCreationRatio}}",
+    "缓存创建: 5m {{cacheCreationRatio5m}}": "Création de cache : 5m {{cacheCreationRatio5m}}",
+    "缓存创建: 1h {{cacheCreationRatio1h}}": "Création de cache : 1h {{cacheCreationRatio1h}}",
+    "缓存创建倍率 5m {{cacheCreationRatio5m}}": "Multiplicateur de création de cache 5m {{cacheCreationRatio5m}}",
+    "缓存创建倍率 1h {{cacheCreationRatio1h}}": "Multiplicateur de création de cache 1h {{cacheCreationRatio1h}}",
    "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})": "Prix de création du cache : {{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (taux de création de cache : {{cacheCreationRatio}})",
    "编辑": "Modifier",
    "编辑API": "Modifier l'API",
--- a/web/src/i18n/locales/ja.json
+++ b/web/src/i18n/locales/ja.json
@@ -561,6 +561,9 @@
    "启用绘图功能": "画像生成機能を有効にする",
    "启用请求体透传功能": "リクエストボディのパススルー機能を有効にします。",
    "启用请求透传": "リクエストパススルーを有効にする",
+    "禁用思考处理的模型列表": "Thinking処理を無効化するモデル一覧",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "ここに含まれるモデルでは-thinking/-nothinkingサフィックスを自動的に追加・削除しません。",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "JSON配列を入力してください（例：[\"model-a\",\"model-b\"]）",
    "启用额度消费日志记录": "クォータ消費のログ記録を有効にする",
    "启用验证": "認証を有効にする",
    "周": "週",
@@ -1516,6 +1519,10 @@
    "缓存倍率": "キャッシュ倍率",
    "缓存创建 Tokens": "キャッシュ作成トークン",
    "缓存创建: {{cacheCreationRatio}}": "キャッシュ作成：{{cacheCreationRatio}}",
+    "缓存创建: 5m {{cacheCreationRatio5m}}": "キャッシュ作成：5m {{cacheCreationRatio5m}}",
+    "缓存创建: 1h {{cacheCreationRatio1h}}": "キャッシュ作成：1h {{cacheCreationRatio1h}}",
+    "缓存创建倍率 5m {{cacheCreationRatio5m}}": "キャッシュ作成倍率 5m {{cacheCreationRatio5m}}",
+    "缓存创建倍率 1h {{cacheCreationRatio1h}}": "キャッシュ作成倍率 1h {{cacheCreationRatio1h}}",
    "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})": "キャッシュ作成料金：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1Mtokens（キャッシュ作成倍率：{{cacheCreationRatio}}）",
    "编辑": "編集",
    "编辑API": "API編集",
@@ -2075,4 +2082,4 @@
    "统一的": "統合型",
    "大模型接口网关": "LLM APIゲートウェイ"
  }
-}
+}
--- a/web/src/i18n/locales/ru.json
+++ b/web/src/i18n/locales/ru.json
@@ -567,6 +567,9 @@
    "启用绘图功能": "Включить функцию рисования",
    "启用请求体透传功能": "Включить функцию прозрачной передачи тела запроса",
    "启用请求透传": "Включить прозрачную передачу запросов",
+    "禁用思考处理的模型列表": "Список моделей без обработки thinking",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "Для этих моделей суффиксы -thinking/-nothinking не будут добавляться или удаляться автоматически.",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "Введите JSON-массив, например [\"model-a\",\"model-b\"]",
    "启用额度消费日志记录": "Включить журналирование потребления квоты",
    "启用验证": "Включить проверку",
    "周": "Неделя",
@@ -1534,6 +1537,10 @@
    "缓存倍率": "Коэффициент кэширования",
    "缓存创建 Tokens": "Создание кэша токенов",
    "缓存创建: {{cacheCreationRatio}}": "Создание кэша: {{cacheCreationRatio}}",
+    "缓存创建: 5m {{cacheCreationRatio5m}}": "Создание кэша: 5m {{cacheCreationRatio5m}}",
+    "缓存创建: 1h {{cacheCreationRatio1h}}": "Создание кэша: 1h {{cacheCreationRatio1h}}",
+    "缓存创建倍率 5m {{cacheCreationRatio5m}}": "Множитель создания кэша 5m {{cacheCreationRatio5m}}",
+    "缓存创建倍率 1h {{cacheCreationRatio1h}}": "Множитель создания кэша 1h {{cacheCreationRatio1h}}",
    "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})": "Цена создания кэша: {{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M токенов (коэффициент создания кэша: {{cacheCreationRatio}})",
    "编辑": "Редактировать",
    "编辑API": "Редактировать API",
--- a/web/src/i18n/locales/zh.json
+++ b/web/src/i18n/locales/zh.json
@@ -558,6 +558,9 @@
    "启用绘图功能": "启用绘图功能",
    "启用请求体透传功能": "启用请求体透传功能",
    "启用请求透传": "启用请求透传",
+    "禁用思考处理的模型列表": "禁用思考处理的模型列表",
+    "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀": "列出的模型将不会自动添加或移除-thinking/-nothinking 后缀",
+    "请输入JSON数组，如 [\"model-a\",\"model-b\"]": "请输入JSON数组，如 [\"model-a\",\"model-b\"]",
    "启用额度消费日志记录": "启用额度消费日志记录",
    "启用验证": "启用验证",
    "周": "周",
@@ -1507,6 +1510,10 @@
    "缓存倍率": "缓存倍率",
    "缓存创建 Tokens": "缓存创建 Tokens",
    "缓存创建: {{cacheCreationRatio}}": "缓存创建: {{cacheCreationRatio}}",
+    "缓存创建: 5m {{cacheCreationRatio5m}}": "缓存创建: 5m {{cacheCreationRatio5m}}",
+    "缓存创建: 1h {{cacheCreationRatio1h}}": "缓存创建: 1h {{cacheCreationRatio1h}}",
+    "缓存创建倍率 5m {{cacheCreationRatio5m}}": "缓存创建倍率 5m {{cacheCreationRatio5m}}",
+    "缓存创建倍率 1h {{cacheCreationRatio1h}}": "缓存创建倍率 1h {{cacheCreationRatio1h}}",
    "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})": "缓存创建价格：{{symbol}}{{price}} * {{ratio}} = {{symbol}}{{total}} / 1M tokens (缓存创建倍率: {{cacheCreationRatio}})",
    "编辑": "编辑",
    "编辑API": "编辑API",
@@ -2066,4 +2073,4 @@
    "Creem 介绍": "Creem 是一个简单的支付处理平台，支持固定金额产品销售，以及订阅销售。",
    "Creem Setting Tips": "Creem 只支持预设的固定金额产品，这产品以及价格需要提前在Creem网站内创建配置，所以不支持自定义动态金额充值。在Creem端配置产品的名字以及价格，获取Product Id 后填到下面的产品，在new-api为该产品设置充值额度，以及展示价格。"
  }
-}
+}
--- a/web/src/pages/Playground/index.jsx
+++ b/web/src/pages/Playground/index.jsx
@@ -47,6 +47,7 @@ import {
  createLoadingAssistantMessage,
  getTextContent,
  buildApiPayload,
+  encodeToBase64,
 } from '../../helpers';

 // Components
@@ -72,7 +73,7 @@ const generateAvatarDataUrl = (username) => {
      <text x="50%" y="50%" dominant-baseline="central" text-anchor="middle" font-size="16" fill="#ffffff" font-family="sans-serif">${firstLetter}</text>
    </svg>
  `;
-  return `data:image/svg+xml;base64,${btoa(svg)}`;
+  return `data:image/svg+xml;base64,${encodeToBase64(svg)}`;
 };

 const Playground = () => {
--- a/web/src/pages/Setting/Model/SettingGlobalModel.jsx
+++ b/web/src/pages/Setting/Model/SettingGlobalModel.jsx
@@ -29,23 +29,44 @@ import {
 } from '../../../helpers';
 import { useTranslation } from 'react-i18next';

+const thinkingExample = JSON.stringify(
+  ['moonshotai/kimi-k2-thinking', 'kimi-k2-thinking'],
+  null,
+  2,
+);
+
+const defaultGlobalSettingInputs = {
+  'global.pass_through_request_enabled': false,
+  'global.thinking_model_blacklist': '[]',
+  'general_setting.ping_interval_enabled': false,
+  'general_setting.ping_interval_seconds': 60,
+};
+
 export default function SettingGlobalModel(props) {
  const { t } = useTranslation();

  const [loading, setLoading] = useState(false);
-  const [inputs, setInputs] = useState({
-    'global.pass_through_request_enabled': false,
-    'general_setting.ping_interval_enabled': false,
-    'general_setting.ping_interval_seconds': 60,
-  });
+  const [inputs, setInputs] = useState(defaultGlobalSettingInputs);
  const refForm = useRef();
-  const [inputsRow, setInputsRow] = useState(inputs);
+  const [inputsRow, setInputsRow] = useState(defaultGlobalSettingInputs);
+
+  const normalizeValueBeforeSave = (key, value) => {
+    if (key === 'global.thinking_model_blacklist') {
+      const text = typeof value === 'string' ? value.trim() : '';
+      return text === '' ? '[]' : value;
+    }
+    return value;
+  };

  function onSubmit() {
    const updateArray = compareObjects(inputs, inputsRow);
    if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
    const requestQueue = updateArray.map((item) => {
-      let value = String(inputs[item.key]);
+      const normalizedValue = normalizeValueBeforeSave(
+        item.key,
+        inputs[item.key],
+      );
+      let value = String(normalizedValue);

      return API.put('/api/option/', {
        key: item.key,
@@ -74,14 +95,30 @@ export default function SettingGlobalModel(props) {

  useEffect(() => {
    const currentInputs = {};
-    for (let key in props.options) {
-      if (Object.keys(inputs).includes(key)) {
-        currentInputs[key] = props.options[key];
+    for (const key of Object.keys(defaultGlobalSettingInputs)) {
+      if (props.options[key] !== undefined) {
+        let value = props.options[key];
+        if (key === 'global.thinking_model_blacklist') {
+          try {
+            value =
+              value && String(value).trim() !== ''
+                ? JSON.stringify(JSON.parse(value), null, 2)
+                : defaultGlobalSettingInputs[key];
+          } catch (error) {
+            value = defaultGlobalSettingInputs[key];
+          }
+        }
+        currentInputs[key] = value;
+      } else {
+        currentInputs[key] = defaultGlobalSettingInputs[key];
      }
    }
+
    setInputs(currentInputs);
    setInputsRow(structuredClone(currentInputs));
-    refForm.current.setValues(currentInputs);
+    if (refForm.current) {
+      refForm.current.setValues(currentInputs);
+    }
  }, [props.options]);

  return (
@@ -110,6 +147,38 @@ export default function SettingGlobalModel(props) {
                />
              </Col>
            </Row>
+            <Row>
+              <Col span={24}>
+                <Form.TextArea
+                  label={t('禁用思考处理的模型列表')}
+                  field={'global.thinking_model_blacklist'}
+                  placeholder={
+                    t('例如：') +
+                    '\n' +
+                    thinkingExample
+                  }
+                  rows={4}
+                  rules={[
+                    {
+                      validator: (rule, value) => {
+                        if (!value || value.trim() === '') return true;
+                        return verifyJSON(value);
+                      },
+                      message: t('不是合法的 JSON 字符串'),
+                    },
+                  ]}
+                  extraText={t(
+                    '列出的模型将不会自动添加或移除-thinking/-nothinking 后缀',
+                  )}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      'global.thinking_model_blacklist': value,
+                    })
+                  }
+                />
+              </Col>
+            </Row>

            <Form.Section text={t('连接保活设置')}>
              <Row style={{ marginTop: 10 }}>
Author	SHA1	Message	Date
Seefs	e082268533	feat: ShouldPreserveThinkingSuffix (#2189 )	2025-11-07 17:43:33 +08:00
Seefs	43ee7a98b4	Merge pull request #2188 from QuantumNous/fix-multikey-autodisable fix(channel): 当没有可用密钥时返回错误而不是第一个密钥	2025-11-07 17:41:39 +08:00
Seefs	8ffa961db1	Merge pull request #2156 from feitianbubu/pr/fix-tag-whitespace fix: tag splitting by whitespace	2025-11-07 17:40:02 +08:00
creamlike1024	e87b460070	fix(channel): 当没有可用密钥时返回错误而不是第一个密钥	2025-11-07 16:27:54 +08:00
feitianbubu	65355d8863	fix: update tag normalization regex	2025-11-06 23:24:37 +08:00
CaIon	3dc4d6c39e	feat: restrict automatic channel testing to master node only	2025-11-06 21:12:59 +08:00
Seefs	019412c27a	feat: EditTagModal header && param (#2159 )	2025-11-06 20:18:45 +08:00
Seefs	96a2b81aaa	add custom tool (#2157 )	2025-11-06 20:18:25 +08:00
Seefs	fb610e62a0	fix playground (#2153 )	2025-11-06 20:18:00 +08:00
CaIon	736f7b55b7	feat: add TASK_PRICE_PATCH environment variable for per-task billing configuration	2025-11-06 20:06:02 +08:00
Seefs	2fd33ea294	Merge pull request #2168 from feitianbubu/pr/fix-jimeng-1080p-image fix: trim suffix p for jimeng image model	2025-11-06 19:54:02 +08:00
Seefs	53123aaf94	Merge pull request #2178 from LeonDevLifeLog/main feat: add environment variable switch for critical rate limit	2025-11-06 19:48:28 +08:00
Seefs	f8f5d26600	Merge pull request #2182 from zhaolion/main feat: EditTokenModal 中针对用户创建的 token 默认无限额度	2025-11-06 19:41:27 +08:00
zhaolion	c86bc94d9d	feat: EditTokenModal 中针对用户创建的 token 默认无限额度	2025-11-06 19:36:23 +08:00
Leon	50e8639a40	feat: add environment variable switch for critical rate limit	2025-11-06 15:23:34 +08:00
CaIon	424325162e	feat: enhance Ali video request processing with resolution mapping and size validation	2025-11-05 16:02:39 +08:00
CaIon	a9a8676f7c	fix: logger	2025-11-05 14:49:55 +08:00
feitianbubu	14295f0035	fix: trim suffix p for jimeng image model	2025-11-04 20:21:33 +08:00
IcedTangerine	29e70acc55	Merge pull request #2167 from feitianbubu/pr/fix-jimeng-v30-pro 修复即梦v30-pro视频生成失败问题	2025-11-04 18:37:44 +08:00
feitianbubu	8599b348c0	feat: jimeng_v30_pro only jimeng_ti2v_v30_pro model	2025-11-04 18:29:53 +08:00
IcedTangerine	6a761c2dba	fix: openai 音频模型流模式未正确计费 (#2160 )	2025-11-04 01:43:04 +08:00
Seefs	df2ee649ab	feat: claude 1h cache (#2155 ) * feat: claude 1h cache * feat: claude 1h cache * fix price	2025-11-04 00:20:50 +08:00
feitianbubu	f6b32a664a	fix: tag splitting by whitespace	2025-11-03 18:48:49 +08:00
CaIon	00782aae88	refactor: comment out image file validation for qwen edit in Ali image processing	2025-11-01 14:31:32 +08:00
CaIon	70f8a59a65	fix: improve error handling and validation in Ali video request conversion	2025-10-31 22:39:35 +08:00
CaIon	a4cf9bb6fe	feat: enhance Ali video request handling and validation	2025-10-31 22:26:56 +08:00