Merge pull request #788 from MartialBE/main

feat: Add Claude 3.7 Sonnet thinking mode support
Merge branch 'main' into thinking
2026-04-16 19:57:26 +00:00 · 2025-02-25 15:21:39 +08:00 · 2025-02-25 15:21:22 +08:00 · 2025-02-25 14:37:03 +08:00 · 2025-02-25 14:10:43 +08:00 · 2025-02-25 02:55:23 +08:00
36 changed files with 805 additions and 189 deletions
--- a/README.en.md
+++ b/README.en.md
@@ -63,6 +63,8 @@
    - Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`)
    - Add suffix `-medium` to set medium reasoning effort
    - Add suffix `-low` to set low reasoning effort
+17. 🔄 Thinking to content option `thinking_to_content` in `Channel->Edit->Channel Extra Settings`, default is `false`, when `true`, the `reasoning_content` of the thinking content will be converted to `<think>` tags and concatenated to the content returned.
+18. 🔄 Model rate limit, support setting total request limit and successful request limit in `System Settings->Rate Limit Settings`

 ## Model Support
 This version additionally supports:
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@
    - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
    - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
    - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
+18. 🔄 思考转内容，支持在 `渠道-编辑-渠道额外设置` 中设置 `thinking_to_content` 选项，默认`false`，开启后会将思考内容`reasoning_content`转换为`<think>`标签拼接到内容中返回。
+19. 🔄 模型限流，支持在 `系统设置-速率限制设置` 中设置模型限流，支持设置总请求数限制和成功请求数限制

 ## 模型支持
 此版本额外支持以下模型：
--- a/Rerank.md
+++ b/Rerank.md
@@ -13,7 +13,7 @@ Request:

 ```json
 {
-  "model": "rerank-multilingual-v3.0",
+  "model": "jina-reranker-v2-base-multilingual",
  "query": "What is the capital of the United States?",
  "top_n": 3,
  "documents": [
--- a/common/constants.go
+++ b/common/constants.go
@@ -276,7 +276,7 @@ var ChannelBaseURLs = []string{
 	"https://api.cohere.ai",                     //34
 	"https://api.minimax.chat",                  //35
 	"",                                          //36
-	"",                                          //37
+	"https://api.dify.ai",                       //37
 	"https://api.jina.ai",                       //38
 	"https://api.cloudflare.com",                //39
 	"https://api.siliconflow.cn",                //40
--- a/common/model-ratio.go
+++ b/common/model-ratio.go
@@ -83,92 +83,94 @@ var defaultModelRatio = map[string]float64{
 	"text-curie-001":         1,
 	//"text-davinci-002":               10,
 	//"text-davinci-003":               10,
-	"text-davinci-edit-001":          10,
-	"code-davinci-edit-001":          10,
-	"whisper-1":                      15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
-	"tts-1":                          7.5, // 1k characters -> $0.015
-	"tts-1-1106":                     7.5, // 1k characters -> $0.015
-	"tts-1-hd":                       15,  // 1k characters -> $0.03
-	"tts-1-hd-1106":                  15,  // 1k characters -> $0.03
-	"davinci":                        10,
-	"curie":                          10,
-	"babbage":                        10,
-	"ada":                            10,
-	"text-embedding-3-small":         0.01,
-	"text-embedding-3-large":         0.065,
-	"text-embedding-ada-002":         0.05,
-	"text-search-ada-doc-001":        10,
-	"text-moderation-stable":         0.1,
-	"text-moderation-latest":         0.1,
-	"claude-instant-1":               0.4,   // $0.8 / 1M tokens
-	"claude-2.0":                     4,     // $8 / 1M tokens
-	"claude-2.1":                     4,     // $8 / 1M tokens
-	"claude-3-haiku-20240307":        0.125, // $0.25 / 1M tokens
-	"claude-3-5-haiku-20241022":      0.5,   // $1 / 1M tokens
-	"claude-3-sonnet-20240229":       1.5,   // $3 / 1M tokens
-	"claude-3-5-sonnet-20240620":     1.5,
-	"claude-3-5-sonnet-20241022":     1.5,
-	"claude-3-opus-20240229":         7.5, // $15 / 1M tokens
-	"ERNIE-4.0-8K":                   0.120 * RMB,
-	"ERNIE-3.5-8K":                   0.012 * RMB,
-	"ERNIE-3.5-8K-0205":              0.024 * RMB,
-	"ERNIE-3.5-8K-1222":              0.012 * RMB,
-	"ERNIE-Bot-8K":                   0.024 * RMB,
-	"ERNIE-3.5-4K-0205":              0.012 * RMB,
-	"ERNIE-Speed-8K":                 0.004 * RMB,
-	"ERNIE-Speed-128K":               0.004 * RMB,
-	"ERNIE-Lite-8K-0922":             0.008 * RMB,
-	"ERNIE-Lite-8K-0308":             0.003 * RMB,
-	"ERNIE-Tiny-8K":                  0.001 * RMB,
-	"BLOOMZ-7B":                      0.004 * RMB,
-	"Embedding-V1":                   0.002 * RMB,
-	"bge-large-zh":                   0.002 * RMB,
-	"bge-large-en":                   0.002 * RMB,
-	"tao-8k":                         0.002 * RMB,
-	"PaLM-2":                         1,
-	"gemini-pro":                     1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":              1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-1.0-pro-vision-001":      1,
-	"gemini-1.0-pro-001":             1,
-	"gemini-1.5-pro-latest":          1.75, // $3.5 / 1M tokens
-	"gemini-1.5-pro-exp-0827":        1.75, // $3.5 / 1M tokens
-	"gemini-1.5-flash-latest":        1,
-	"gemini-1.5-flash-exp-0827":      1,
-	"gemini-1.0-pro-latest":          1,
-	"gemini-1.0-pro-vision-latest":   1,
-	"gemini-ultra":                   1,
-	"chatglm_turbo":                  0.3572,     // ￥0.005 / 1k tokens
-	"chatglm_pro":                    0.7143,     // ￥0.01 / 1k tokens
-	"chatglm_std":                    0.3572,     // ￥0.005 / 1k tokens
-	"chatglm_lite":                   0.1429,     // ￥0.002 / 1k tokens
-	"glm-4":                          7.143,      // ￥0.1 / 1k tokens
-	"glm-4v":                         0.05 * RMB, // ￥0.05 / 1k tokens
-	"glm-4-alltools":                 0.1 * RMB,  // ￥0.1 / 1k tokens
-	"glm-3-turbo":                    0.3572,
-	"glm-4-plus":                     0.05 * RMB,
-	"glm-4-0520":                     0.1 * RMB,
-	"glm-4-air":                      0.001 * RMB,
-	"glm-4-airx":                     0.01 * RMB,
-	"glm-4-long":                     0.001 * RMB,
-	"glm-4-flash":                    0,
-	"glm-4v-plus":                    0.01 * RMB,
-	"qwen-turbo":                     0.8572, // ￥0.012 / 1k tokens
-	"qwen-plus":                      10,     // ￥0.14 / 1k tokens
-	"text-embedding-v1":              0.05,   // ￥0.0007 / 1k tokens
-	"SparkDesk-v1.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v2.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v3.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v3.5":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v4.0":                 1.2858,
-	"360GPT_S2_V9":                   0.8572, // ¥0.012 / 1k tokens
-	"360gpt-turbo":                   0.0858, // ¥0.0012 / 1k tokens
-	"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
-	"360gpt-pro":                     0.8572, // ¥0.012 / 1k tokens
-	"360gpt2-pro":                    0.8572, // ¥0.012 / 1k tokens
-	"embedding-bert-512-v1":          0.0715, // ¥0.001 / 1k tokens
-	"embedding_s1_v1":                0.0715, // ¥0.001 / 1k tokens
-	"semantic_similarity_s1_v1":      0.0715, // ¥0.001 / 1k tokens
-	"hunyuan":                        7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
+	"text-davinci-edit-001":               10,
+	"code-davinci-edit-001":               10,
+	"whisper-1":                           15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+	"tts-1":                               7.5, // 1k characters -> $0.015
+	"tts-1-1106":                          7.5, // 1k characters -> $0.015
+	"tts-1-hd":                            15,  // 1k characters -> $0.03
+	"tts-1-hd-1106":                       15,  // 1k characters -> $0.03
+	"davinci":                             10,
+	"curie":                               10,
+	"babbage":                             10,
+	"ada":                                 10,
+	"text-embedding-3-small":              0.01,
+	"text-embedding-3-large":              0.065,
+	"text-embedding-ada-002":              0.05,
+	"text-search-ada-doc-001":             10,
+	"text-moderation-stable":              0.1,
+	"text-moderation-latest":              0.1,
+	"claude-instant-1":                    0.4,   // $0.8 / 1M tokens
+	"claude-2.0":                          4,     // $8 / 1M tokens
+	"claude-2.1":                          4,     // $8 / 1M tokens
+	"claude-3-haiku-20240307":             0.125, // $0.25 / 1M tokens
+	"claude-3-5-haiku-20241022":           0.5,   // $1 / 1M tokens
+	"claude-3-sonnet-20240229":            1.5,   // $3 / 1M tokens
+	"claude-3-5-sonnet-20240620":          1.5,
+	"claude-3-5-sonnet-20241022":          1.5,
+	"claude-3-7-sonnet-20250219":          1.5,
+	"claude-3-7-sonnet-20250219-thinking": 1.5,
+	"claude-3-opus-20240229":              7.5, // $15 / 1M tokens
+	"ERNIE-4.0-8K":                        0.120 * RMB,
+	"ERNIE-3.5-8K":                        0.012 * RMB,
+	"ERNIE-3.5-8K-0205":                   0.024 * RMB,
+	"ERNIE-3.5-8K-1222":                   0.012 * RMB,
+	"ERNIE-Bot-8K":                        0.024 * RMB,
+	"ERNIE-3.5-4K-0205":                   0.012 * RMB,
+	"ERNIE-Speed-8K":                      0.004 * RMB,
+	"ERNIE-Speed-128K":                    0.004 * RMB,
+	"ERNIE-Lite-8K-0922":                  0.008 * RMB,
+	"ERNIE-Lite-8K-0308":                  0.003 * RMB,
+	"ERNIE-Tiny-8K":                       0.001 * RMB,
+	"BLOOMZ-7B":                           0.004 * RMB,
+	"Embedding-V1":                        0.002 * RMB,
+	"bge-large-zh":                        0.002 * RMB,
+	"bge-large-en":                        0.002 * RMB,
+	"tao-8k":                              0.002 * RMB,
+	"PaLM-2":                              1,
+	"gemini-pro":                          1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-pro-vision":                   1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-1.0-pro-vision-001":           1,
+	"gemini-1.0-pro-001":                  1,
+	"gemini-1.5-pro-latest":               1.75, // $3.5 / 1M tokens
+	"gemini-1.5-pro-exp-0827":             1.75, // $3.5 / 1M tokens
+	"gemini-1.5-flash-latest":             1,
+	"gemini-1.5-flash-exp-0827":           1,
+	"gemini-1.0-pro-latest":               1,
+	"gemini-1.0-pro-vision-latest":        1,
+	"gemini-ultra":                        1,
+	"chatglm_turbo":                       0.3572,     // ￥0.005 / 1k tokens
+	"chatglm_pro":                         0.7143,     // ￥0.01 / 1k tokens
+	"chatglm_std":                         0.3572,     // ￥0.005 / 1k tokens
+	"chatglm_lite":                        0.1429,     // ￥0.002 / 1k tokens
+	"glm-4":                               7.143,      // ￥0.1 / 1k tokens
+	"glm-4v":                              0.05 * RMB, // ￥0.05 / 1k tokens
+	"glm-4-alltools":                      0.1 * RMB,  // ￥0.1 / 1k tokens
+	"glm-3-turbo":                         0.3572,
+	"glm-4-plus":                          0.05 * RMB,
+	"glm-4-0520":                          0.1 * RMB,
+	"glm-4-air":                           0.001 * RMB,
+	"glm-4-airx":                          0.01 * RMB,
+	"glm-4-long":                          0.001 * RMB,
+	"glm-4-flash":                         0,
+	"glm-4v-plus":                         0.01 * RMB,
+	"qwen-turbo":                          0.8572, // ￥0.012 / 1k tokens
+	"qwen-plus":                           10,     // ￥0.14 / 1k tokens
+	"text-embedding-v1":                   0.05,   // ￥0.0007 / 1k tokens
+	"SparkDesk-v1.1":                      1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v2.1":                      1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.1":                      1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v3.5":                      1.2858, // ￥0.018 / 1k tokens
+	"SparkDesk-v4.0":                      1.2858,
+	"360GPT_S2_V9":                        0.8572, // ¥0.012 / 1k tokens
+	"360gpt-turbo":                        0.0858, // ¥0.0012 / 1k tokens
+	"360gpt-turbo-responsibility-8k":      0.8572, // ¥0.012 / 1k tokens
+	"360gpt-pro":                          0.8572, // ¥0.012 / 1k tokens
+	"360gpt2-pro":                         0.8572, // ¥0.012 / 1k tokens
+	"embedding-bert-512-v1":               0.0715, // ¥0.001 / 1k tokens
+	"embedding_s1_v1":                     0.0715, // ¥0.001 / 1k tokens
+	"semantic_similarity_s1_v1":           0.0715, // ¥0.001 / 1k tokens
+	"hunyuan":                             7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	// 已经按照 7.2 来换算美元价格
 	"yi-34b-chat-0205":       0.18,
--- a/constant/channel_setting.go
+++ b/constant/channel_setting.go
@@ -1,6 +1,7 @@
 package constant

 var (
-	ForceFormat        = "force_format" // ForceFormat 强制格式化为OpenAI格式
-	ChanelSettingProxy = "proxy"        // Proxy 代理
+	ForceFormat                     = "force_format"        // ForceFormat 强制格式化为OpenAI格式
+	ChanelSettingProxy              = "proxy"               // Proxy 代理
+	ChannelSettingThinkingToContent = "thinking_to_content" // ThinkingToContent
 )
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -85,6 +85,7 @@ func Relay(c *gin.Context) {

 	if openaiErr != nil {
 		if openaiErr.StatusCode == http.StatusTooManyRequests {
+			common.LogError(c, fmt.Sprintf("origin 429 error: %s", openaiErr.Error.Message))
 			openaiErr.Error.Message = "当前分组上游负载已饱和，请稍后再试"
 		}
 		openaiErr.Error.Message = common.MessageWithRequestId(openaiErr.Error.Message, requestId)
--- a/controller/user.go
+++ b/controller/user.go
@@ -913,11 +913,11 @@ func TopUp(c *gin.Context) {
 }

 type UpdateUserSettingRequest struct {
-	QuotaWarningType      string `json:"notify_type"`
-	QuotaWarningThreshold int    `json:"quota_warning_threshold"`
-	WebhookUrl            string `json:"webhook_url,omitempty"`
-	WebhookSecret         string `json:"webhook_secret,omitempty"`
-	NotificationEmail     string `json:"notification_email,omitempty"`
+	QuotaWarningType      string  `json:"notify_type"`
+	QuotaWarningThreshold float64 `json:"quota_warning_threshold"`
+	WebhookUrl            string  `json:"webhook_url,omitempty"`
+	WebhookSecret         string  `json:"webhook_secret,omitempty"`
+	NotificationEmail     string  `json:"notification_email,omitempty"`
 }

 func UpdateUserSetting(c *gin.Context) {
--- a/docs/channel/other_setting.md
+++ b/docs/channel/other_setting.md
@@ -10,6 +10,10 @@
    - 用于配置网络代理
    - 类型为字符串，填写代理地址（例如 socks5 协议的代理地址）

+3. thinking_to_content
+   - 用于标识是否将思考内容`reasoning_conetnt`转换为`<think>`标签拼接到内容中返回
+   - 类型为布尔值，设置为 true 时启用思考内容转换
+
 --------------------------------------------------------------

 ## JSON 格式示例
@@ -19,6 +23,7 @@
 ```json
 {
    "force_format": true,
+   "thinking_to_content": true,
    "proxy": "socks5://xxxxxxx"
 }
 ```
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -1,6 +1,9 @@
 package dto

-import "encoding/json"
+import (
+	"encoding/json"
+	"strings"
+)

 type ResponseFormat struct {
 	Type       string            `json:"type,omitempty"`
@@ -47,6 +50,7 @@ type GeneralOpenAIRequest struct {
 	Dimensions          int             `json:"dimensions,omitempty"`
 	Modalities          any             `json:"modalities,omitempty"`
 	Audio               any             `json:"audio,omitempty"`
+	ExtraBody           any             `json:"extra_body,omitempty"`
 }

 type OpenAITools struct {
@@ -101,7 +105,7 @@ type Message struct {

 type MediaContent struct {
 	Type       string `json:"type"`
-	Text       string `json:"text"`
+	Text       string `json:"text,omitempty"`
 	ImageUrl   any    `json:"image_url,omitempty"`
 	InputAudio any    `json:"input_audio,omitempty"`
 }
@@ -153,11 +157,24 @@ func (m *Message) StringContent() string {
 	if m.parsedStringContent != nil {
 		return *m.parsedStringContent
 	}
+
 	var stringContent string
 	if err := json.Unmarshal(m.Content, &stringContent); err == nil {
+		m.parsedStringContent = &stringContent
 		return stringContent
 	}
-	return string(m.Content)
+
+	contentStr := new(strings.Builder)
+	arrayContent := m.ParseContent()
+	for _, content := range arrayContent {
+		if content.Type == ContentTypeText {
+			contentStr.WriteString(content.Text)
+		}
+	}
+	stringContent = contentStr.String()
+	m.parsedStringContent = &stringContent
+
+	return stringContent
 }

 func (m *Message) SetStringContent(content string) {
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -86,6 +86,10 @@ func (c *ChatCompletionsStreamResponseChoiceDelta) GetReasoningContent() string
 	return *c.ReasoningContent
 }

+func (c *ChatCompletionsStreamResponseChoiceDelta) SetReasoningContent(s string) {
+	c.ReasoningContent = &s
+}
+
 type ToolCall struct {
 	// Index is not nil only in chat completion chunk object
 	Index    *int         `json:"index,omitempty"`
@@ -116,6 +120,20 @@ type ChatCompletionsStreamResponse struct {
 	Usage             *Usage                                `json:"usage"`
 }

+func (c *ChatCompletionsStreamResponse) Copy() *ChatCompletionsStreamResponse {
+	choices := make([]ChatCompletionsStreamResponseChoice, len(c.Choices))
+	copy(choices, c.Choices)
+	return &ChatCompletionsStreamResponse{
+		Id:                c.Id,
+		Object:            c.Object,
+		Created:           c.Created,
+		Model:             c.Model,
+		SystemFingerprint: c.SystemFingerprint,
+		Choices:           choices,
+		Usage:             c.Usage,
+	}
+}
+
 func (c *ChatCompletionsStreamResponse) GetSystemFingerprint() string {
 	if c.SystemFingerprint == nil {
 		return ""
--- a/middleware/model-rate-limit.go
+++ b/middleware/model-rate-limit.go
@@ -0,0 +1,172 @@
+package middleware
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"one-api/common"
+	"one-api/setting"
+	"strconv"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/go-redis/redis/v8"
+)
+
+const (
+	ModelRequestRateLimitCountMark        = "MRRL"
+	ModelRequestRateLimitSuccessCountMark = "MRRLS"
+)
+
+// 检查Redis中的请求限制
+func checkRedisRateLimit(ctx context.Context, rdb *redis.Client, key string, maxCount int, duration int64) (bool, error) {
+	// 如果maxCount为0，表示不限制
+	if maxCount == 0 {
+		return true, nil
+	}
+
+	// 获取当前计数
+	length, err := rdb.LLen(ctx, key).Result()
+	if err != nil {
+		return false, err
+	}
+
+	// 如果未达到限制，允许请求
+	if length < int64(maxCount) {
+		return true, nil
+	}
+
+	// 检查时间窗口
+	oldTimeStr, _ := rdb.LIndex(ctx, key, -1).Result()
+	oldTime, err := time.Parse(timeFormat, oldTimeStr)
+	if err != nil {
+		return false, err
+	}
+
+	nowTimeStr := time.Now().Format(timeFormat)
+	nowTime, err := time.Parse(timeFormat, nowTimeStr)
+	if err != nil {
+		return false, err
+	}
+	// 如果在时间窗口内已达到限制，拒绝请求
+	subTime := nowTime.Sub(oldTime).Seconds()
+	if int64(subTime) < duration {
+		rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// 记录Redis请求
+func recordRedisRequest(ctx context.Context, rdb *redis.Client, key string, maxCount int) {
+	// 如果maxCount为0，不记录请求
+	if maxCount == 0 {
+		return
+	}
+
+	now := time.Now().Format(timeFormat)
+	rdb.LPush(ctx, key, now)
+	rdb.LTrim(ctx, key, 0, int64(maxCount-1))
+	rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
+}
+
+// Redis限流处理器
+func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		ctx := context.Background()
+		rdb := common.RDB
+
+		// 1. 检查总请求数限制（当totalMaxCount为0时会自动跳过）
+		totalKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitCountMark, userId)
+		allowed, err := checkRedisRateLimit(ctx, rdb, totalKey, totalMaxCount, duration)
+		if err != nil {
+			fmt.Println("检查总请求数限制失败:", err.Error())
+			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+			return
+		}
+		if !allowed {
+			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制：%d分钟内最多请求%d次，包括失败次数，请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
+		}
+
+		// 2. 检查成功请求数限制
+		successKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitSuccessCountMark, userId)
+		allowed, err = checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
+		if err != nil {
+			fmt.Println("检查成功请求数限制失败:", err.Error())
+			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+			return
+		}
+		if !allowed {
+			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到请求数限制：%d分钟内最多请求%d次", setting.ModelRequestRateLimitDurationMinutes, successMaxCount))
+			return
+		}
+
+		// 3. 记录总请求（当totalMaxCount为0时会自动跳过）
+		recordRedisRequest(ctx, rdb, totalKey, totalMaxCount)
+
+		// 4. 处理请求
+		c.Next()
+
+		// 5. 如果请求成功，记录成功请求
+		if c.Writer.Status() < 400 {
+			recordRedisRequest(ctx, rdb, successKey, successMaxCount)
+		}
+	}
+}
+
+// 内存限流处理器
+func memoryRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	inMemoryRateLimiter.Init(common.RateLimitKeyExpirationDuration)
+
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		totalKey := ModelRequestRateLimitCountMark + userId
+		successKey := ModelRequestRateLimitSuccessCountMark + userId
+
+		// 1. 检查总请求数限制（当totalMaxCount为0时跳过）
+		if totalMaxCount > 0 && !inMemoryRateLimiter.Request(totalKey, totalMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 2. 检查成功请求数限制
+		// 使用一个临时key来检查限制，这样可以避免实际记录
+		checkKey := successKey + "_check"
+		if !inMemoryRateLimiter.Request(checkKey, successMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 3. 处理请求
+		c.Next()
+
+		// 4. 如果请求成功，记录到实际的成功请求计数中
+		if c.Writer.Status() < 400 {
+			inMemoryRateLimiter.Request(successKey, successMaxCount, duration)
+		}
+	}
+}
+
+// ModelRequestRateLimit 模型请求限流中间件
+func ModelRequestRateLimit() func(c *gin.Context) {
+	// 如果未启用限流，直接放行
+	if !setting.ModelRequestRateLimitEnabled {
+		return defNext
+	}
+
+	// 计算限流参数
+	duration := int64(setting.ModelRequestRateLimitDurationMinutes * 60)
+	totalMaxCount := setting.ModelRequestRateLimitCount
+	successMaxCount := setting.ModelRequestRateLimitSuccessCount
+
+	// 根据存储类型选择限流处理器
+	if common.RedisEnabled {
+		return redisRateLimitHandler(duration, totalMaxCount, successMaxCount)
+	} else {
+		return memoryRateLimitHandler(duration, totalMaxCount, successMaxCount)
+	}
+}
--- a/model/option.go
+++ b/model/option.go
@@ -85,6 +85,9 @@ func InitOptionMap() {
 	common.OptionMap["QuotaForInvitee"] = strconv.Itoa(common.QuotaForInvitee)
 	common.OptionMap["QuotaRemindThreshold"] = strconv.Itoa(common.QuotaRemindThreshold)
 	common.OptionMap["ShouldPreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
+	common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
+	common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
+	common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
 	common.OptionMap["ModelRatio"] = common.ModelRatio2JSONString()
 	common.OptionMap["ModelPrice"] = common.ModelPrice2JSONString()
 	common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
@@ -105,6 +108,7 @@ func InitOptionMap() {
 	common.OptionMap["MjActionCheckSuccessEnabled"] = strconv.FormatBool(setting.MjActionCheckSuccessEnabled)
 	common.OptionMap["CheckSensitiveEnabled"] = strconv.FormatBool(setting.CheckSensitiveEnabled)
 	common.OptionMap["DemoSiteEnabled"] = strconv.FormatBool(setting.DemoSiteEnabled)
+	common.OptionMap["ModelRequestRateLimitEnabled"] = strconv.FormatBool(setting.ModelRequestRateLimitEnabled)
 	common.OptionMap["CheckSensitiveOnPromptEnabled"] = strconv.FormatBool(setting.CheckSensitiveOnPromptEnabled)
 	//common.OptionMap["CheckSensitiveOnCompletionEnabled"] = strconv.FormatBool(constant.CheckSensitiveOnCompletionEnabled)
 	common.OptionMap["StopOnSensitiveEnabled"] = strconv.FormatBool(setting.StopOnSensitiveEnabled)
@@ -226,6 +230,9 @@ func updateOptionMap(key string, value string) (err error) {
 			setting.DemoSiteEnabled = boolValue
 		case "CheckSensitiveOnPromptEnabled":
 			setting.CheckSensitiveOnPromptEnabled = boolValue
+		case "ModelRequestRateLimitEnabled":
+			setting.ModelRequestRateLimitEnabled = boolValue
+
 		//case "CheckSensitiveOnCompletionEnabled":
 		//	constant.CheckSensitiveOnCompletionEnabled = boolValue
 		case "StopOnSensitiveEnabled":
@@ -308,6 +315,12 @@ func updateOptionMap(key string, value string) (err error) {
 		common.QuotaRemindThreshold, _ = strconv.Atoi(value)
 	case "ShouldPreConsumedQuota":
 		common.PreConsumedQuota, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitCount":
+		setting.ModelRequestRateLimitCount, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitDurationMinutes":
+		setting.ModelRequestRateLimitDurationMinutes, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitSuccessCount":
+		setting.ModelRequestRateLimitSuccessCount, _ = strconv.Atoi(value)
 	case "RetryTimes":
 		common.RetryTimes, _ = strconv.Atoi(value)
 	case "DataExportInterval":
--- a/relay/channel/aws/constants.go
+++ b/relay/channel/aws/constants.go
@@ -9,7 +9,8 @@ var awsModelIDMap = map[string]string{
 	"claude-3-haiku-20240307":    "anthropic.claude-3-haiku-20240307-v1:0",
 	"claude-3-5-sonnet-20240620": "anthropic.claude-3-5-sonnet-20240620-v1:0",
 	"claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-	"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
+	"claude-3-5-haiku-20241022":  "anthropic.claude-3-5-haiku-20241022-v1:0",
+	"claude-3-7-sonnet-20250219": "anthropic.claude-3-7-sonnet-20250219-v1:0",
 }

 var ChannelName = "aws"
--- a/relay/channel/aws/dto.go
+++ b/relay/channel/aws/dto.go
@@ -16,6 +16,7 @@ type AwsClaudeRequest struct {
 	StopSequences    []string               `json:"stop_sequences,omitempty"`
 	Tools            []claude.Tool          `json:"tools,omitempty"`
 	ToolChoice       any                    `json:"tool_choice,omitempty"`
+	Thinking         *claude.Thinking       `json:"thinking,omitempty"`
 }

 func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
@@ -30,5 +31,6 @@ func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
 		StopSequences:    req.StopSequences,
 		Tools:            req.Tools,
 		ToolChoice:       req.ToolChoice,
+		Thinking:         req.Thinking,
 	}
 }
--- a/relay/channel/claude/constants.go
+++ b/relay/channel/claude/constants.go
@@ -11,6 +11,8 @@ var ModelList = []string{
 	"claude-3-5-haiku-20241022",
 	"claude-3-5-sonnet-20240620",
 	"claude-3-5-sonnet-20241022",
+	"claude-3-7-sonnet-20250219",
+	"claude-3-7-sonnet-20250219-thinking",
 }

 var ChannelName = "claude"
--- a/relay/channel/claude/dto.go
+++ b/relay/channel/claude/dto.go
@@ -11,6 +11,9 @@ type ClaudeMediaMessage struct {
 	Usage       *ClaudeUsage         `json:"usage,omitempty"`
 	StopReason  *string              `json:"stop_reason,omitempty"`
 	PartialJson string               `json:"partial_json,omitempty"`
+	Thinking    string               `json:"thinking,omitempty"`
+	Signature   string               `json:"signature,omitempty"`
+	Delta       string               `json:"delta,omitempty"`
 	// tool_calls
 	Id        string `json:"id,omitempty"`
 	Name      string `json:"name,omitempty"`
@@ -54,9 +57,15 @@ type ClaudeRequest struct {
 	TopP              float64         `json:"top_p,omitempty"`
 	TopK              int             `json:"top_k,omitempty"`
 	//ClaudeMetadata    `json:"metadata,omitempty"`
-	Stream     bool   `json:"stream,omitempty"`
-	Tools      []Tool `json:"tools,omitempty"`
-	ToolChoice any    `json:"tool_choice,omitempty"`
+	Stream     bool      `json:"stream,omitempty"`
+	Tools      []Tool    `json:"tools,omitempty"`
+	ToolChoice any       `json:"tool_choice,omitempty"`
+	Thinking   *Thinking `json:"thinking,omitempty"`
+}
+
+type Thinking struct {
+	Type         string `json:"type"`
+	BudgetTokens int    `json:"budget_tokens"`
 }

 type ClaudeError struct {
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -92,6 +92,25 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 		Stream:        textRequest.Stream,
 		Tools:         claudeTools,
 	}
+
+	if strings.HasSuffix(textRequest.Model, "-thinking") {
+		if claudeRequest.MaxTokens == 0 {
+			claudeRequest.MaxTokens = 8192
+		}
+
+		// 因为BudgetTokens 必须大于1024
+		if claudeRequest.MaxTokens < 1280 {
+			claudeRequest.MaxTokens = 1280
+		}
+
+		// BudgetTokens 为 max_tokens 的 80%
+		claudeRequest.Thinking = &Thinking{
+			Type:         "enabled",
+			BudgetTokens: int(float64(claudeRequest.MaxTokens) * 0.8),
+		}
+		claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+	}
+
 	if claudeRequest.MaxTokens == 0 {
 		claudeRequest.MaxTokens = 4096
 	}
@@ -308,12 +327,20 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) (*
 			if claudeResponse.Delta != nil {
 				choice.Index = claudeResponse.Index
 				choice.Delta.SetContentString(claudeResponse.Delta.Text)
-				if claudeResponse.Delta.Type == "input_json_delta" {
+				switch claudeResponse.Delta.Type {
+				case "input_json_delta":
 					tools = append(tools, dto.ToolCall{
 						Function: dto.FunctionCall{
 							Arguments: claudeResponse.Delta.PartialJson,
 						},
 					})
+				case "signature_delta":
+					// 加密的不处理
+					signatureContent := "\n"
+					choice.Delta.ReasoningContent = &signatureContent
+				case "thinking_delta":
+					thinkingContent := claudeResponse.Delta.Thinking
+					choice.Delta.ReasoningContent = &thinkingContent
 				}
 			}
 		} else if claudeResponse.Type == "message_delta" {
@@ -352,6 +379,8 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 		responseText = claudeResponse.Content[0].Text
 	}
 	tools := make([]dto.ToolCall, 0)
+	thinkingContent := ""
+
 	if reqMode == RequestModeCompletion {
 		content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
 		choice := dto.OpenAITextResponseChoice{
@@ -367,7 +396,8 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 	} else {
 		fullTextResponse.Id = claudeResponse.Id
 		for _, message := range claudeResponse.Content {
-			if message.Type == "tool_use" {
+			switch message.Type {
+			case "tool_use":
 				args, _ := json.Marshal(message.Input)
 				tools = append(tools, dto.ToolCall{
 					ID:   message.Id,
@@ -377,6 +407,11 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 						Arguments: string(args),
 					},
 				})
+			case "thinking":
+				// 加密的不管， 只输出明文的推理过程
+				thinkingContent = message.Thinking
+			case "text":
+				responseText = message.Text
 			}
 		}
 	}
@@ -391,6 +426,7 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 	if len(tools) > 0 {
 		choice.Message.SetToolCalls(tools)
 	}
+	choice.Message.ReasoningContent = thinkingContent
 	fullTextResponse.Model = claudeResponse.Model
 	choices = append(choices, choice)
 	fullTextResponse.Choices = choices
--- a/relay/channel/dify/adaptor.go
+++ b/relay/channel/dify/adaptor.go
@@ -9,9 +9,18 @@ import (
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+	"strings"
+)
+
+const (
+	BotTypeChatFlow   = 1 // chatflow default
+	BotTypeAgent      = 2
+	BotTypeWorkFlow   = 3
+	BotTypeCompletion = 4
 )

 type Adaptor struct {
+	BotType int
 }

 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
@@ -25,10 +34,28 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
+	if strings.HasPrefix(info.UpstreamModelName, "agent") {
+		a.BotType = BotTypeAgent
+	} else if strings.HasPrefix(info.UpstreamModelName, "workflow") {
+		a.BotType = BotTypeWorkFlow
+	} else if strings.HasPrefix(info.UpstreamModelName, "chat") {
+		a.BotType = BotTypeCompletion
+	} else {
+		a.BotType = BotTypeChatFlow
+	}
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	switch a.BotType {
+	case BotTypeWorkFlow:
+		return fmt.Sprintf("%s/v1/workflows/run", info.BaseUrl), nil
+	case BotTypeCompletion:
+		return fmt.Sprintf("%s/v1/completion-messages", info.BaseUrl), nil
+	case BotTypeAgent:
+		fallthrough
+	default:
+		return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	}
 }

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
@@ -53,7 +80,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

-
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/mistral/adaptor.go
+++ b/relay/channel/mistral/adaptor.go
@@ -41,9 +41,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, re
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	mistralReq := requestOpenAI2Mistral(*request)
-	//common.LogJson(c, "body", mistralReq)
-	return mistralReq, nil
+	return requestOpenAI2Mistral(request), nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -55,7 +53,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

-
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/mistral/text.go
+++ b/relay/channel/mistral/text.go
@@ -1,25 +1,21 @@
 package mistral

 import (
-	"encoding/json"
 	"one-api/dto"
 )

-func requestOpenAI2Mistral(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
+func requestOpenAI2Mistral(request *dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
 	messages := make([]dto.Message, 0, len(request.Messages))
 	for _, message := range request.Messages {
-		if !message.IsStringContent() {
-			mediaMessages := message.ParseContent()
-			for j, mediaMessage := range mediaMessages {
-				if mediaMessage.Type == dto.ContentTypeImageURL {
-					imageUrl := mediaMessage.ImageUrl.(dto.MessageImageUrl)
-					mediaMessage.ImageUrl = imageUrl.Url
-					mediaMessages[j] = mediaMessage
-				}
+		mediaMessages := message.ParseContent()
+		for j, mediaMessage := range mediaMessages {
+			if mediaMessage.Type == dto.ContentTypeImageURL {
+				imageUrl := mediaMessage.ImageUrl.(dto.MessageImageUrl)
+				mediaMessage.ImageUrl = imageUrl.Url
+				mediaMessages[j] = mediaMessage
 			}
-			messageRaw, _ := json.Marshal(mediaMessages)
-			message.Content = messageRaw
 		}
+		message.SetMediaContent(mediaMessages)
 		messages = append(messages, dto.Message{
 			Role:       message.Role,
 			Content:    message.Content,
--- a/relay/channel/ollama/dto.go
+++ b/relay/channel/ollama/dto.go
@@ -11,6 +11,7 @@ type OllamaRequest struct {
 	Topp             float64            `json:"top_p,omitempty"`
 	TopK             int                `json:"top_k,omitempty"`
 	Stop             any                `json:"stop,omitempty"`
+	MaxTokens        uint               `json:"max_tokens,omitempty"`
 	Tools            []dto.ToolCall     `json:"tools,omitempty"`
 	ResponseFormat   any                `json:"response_format,omitempty"`
 	FrequencyPenalty float64            `json:"frequency_penalty,omitempty"`
--- a/relay/channel/ollama/relay-ollama.go
+++ b/relay/channel/ollama/relay-ollama.go
@@ -58,6 +58,7 @@ func requestOpenAI2Ollama(request dto.GeneralOpenAIRequest) (*OllamaRequest, err
 		TopK:             request.TopK,
 		Stop:             Stop,
 		Tools:            request.Tools,
+		MaxTokens:        request.MaxTokens,
 		ResponseFormat:   request.ResponseFormat,
 		FrequencyPenalty: request.FrequencyPenalty,
 		PresencePenalty:  request.PresencePenalty,
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -5,10 +5,6 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"github.com/bytedance/gopkg/util/gopool"
-	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
-	"github.com/pkg/errors"
 	"io"
 	"math"
 	"mime/multipart"
@@ -23,21 +19,66 @@ import (
 	"strings"
 	"sync"
 	"time"
+
+	"github.com/bytedance/gopkg/util/gopool"
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
+	"github.com/pkg/errors"
 )

-func sendStreamData(c *gin.Context, data string, forceFormat bool) error {
+func sendStreamData(c *gin.Context, info *relaycommon.RelayInfo, data string, forceFormat bool, thinkToContent bool) error {
 	if data == "" {
 		return nil
 	}

-	if forceFormat {
-		var lastStreamResponse dto.ChatCompletionsStreamResponse
-		if err := json.Unmarshal(common.StringToByteSlice(data), &lastStreamResponse); err != nil {
-			return err
-		}
+	if !forceFormat && !thinkToContent {
+		return service.StringData(c, data)
+	}
+
+	var lastStreamResponse dto.ChatCompletionsStreamResponse
+	if err := json.Unmarshal(common.StringToByteSlice(data), &lastStreamResponse); err != nil {
+		return err
+	}
+
+	if !thinkToContent {
 		return service.ObjectData(c, lastStreamResponse)
 	}
-	return service.StringData(c, data)
+
+	// Handle think to content conversion
+	if info.IsFirstResponse {
+		response := lastStreamResponse.Copy()
+		for i := range response.Choices {
+			response.Choices[i].Delta.SetContentString("<think>\n")
+			response.Choices[i].Delta.SetReasoningContent("")
+		}
+		service.ObjectData(c, response)
+	}
+
+	if lastStreamResponse.Choices == nil || len(lastStreamResponse.Choices) == 0 {
+		return service.ObjectData(c, lastStreamResponse)
+	}
+
+	// Process each choice
+	for i, choice := range lastStreamResponse.Choices {
+		// Handle transition from thinking to content
+		if len(choice.Delta.GetContentString()) > 0 && !info.SendLastReasoningResponse {
+			response := lastStreamResponse.Copy()
+			for j := range response.Choices {
+				response.Choices[j].Delta.SetContentString("\n</think>")
+				response.Choices[j].Delta.SetReasoningContent("")
+			}
+			info.SendLastReasoningResponse = true
+			service.ObjectData(c, response)
+		}
+
+		// Convert reasoning content to regular content
+		if len(choice.Delta.GetReasoningContent()) > 0 {
+			lastStreamResponse.Choices[i].Delta.SetContentString(choice.Delta.GetReasoningContent())
+			lastStreamResponse.Choices[i].Delta.SetReasoningContent("")
+		}
+	}
+
+	return service.ObjectData(c, lastStreamResponse)
 }

 func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
@@ -56,11 +97,14 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	var usage = &dto.Usage{}
 	var streamItems []string // store stream items
 	var forceFormat bool
+	var thinkToContent bool

-	if info.ChannelType == common.ChannelTypeCustom {
-		if forceFmt, ok := info.ChannelSetting["force_format"].(bool); ok {
-			forceFormat = forceFmt
-		}
+	if forceFmt, ok := info.ChannelSetting[constant.ForceFormat].(bool); ok {
+		forceFormat = forceFmt
+	}
+
+	if think2Content, ok := info.ChannelSetting[constant.ChannelSettingThinkingToContent].(bool); ok {
+		thinkToContent = think2Content
 	}

 	toolCount := 0
@@ -84,9 +128,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	)
 	gopool.Go(func() {
 		for scanner.Scan() {
-			info.SetFirstResponseTime()
+			//info.SetFirstResponseTime()
 			ticker.Reset(time.Duration(constant.StreamingTimeout) * time.Second)
 			data := scanner.Text()
+			if common.DebugEnabled {
+				println(data)
+			}
 			if len(data) < 6 { // ignore blank line or wrong format
 				continue
 			}
@@ -98,10 +145,11 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 			data = strings.TrimSpace(data)
 			if !strings.HasPrefix(data, "[DONE]") {
 				if lastStreamData != "" {
-					err := sendStreamData(c, lastStreamData, forceFormat)
+					err := sendStreamData(c, info, lastStreamData, forceFormat, thinkToContent)
 					if err != nil {
 						common.LogError(c, "streaming error: "+err.Error())
 					}
+					info.SetFirstResponseTime()
 				}
 				lastStreamData = data
 				streamItems = append(streamItems, data)
@@ -141,7 +189,7 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 		}
 	}
 	if shouldSendLastResp {
-		sendStreamData(c, lastStreamData, forceFormat)
+		sendStreamData(c, info, lastStreamData, forceFormat, thinkToContent)
 	}

 	// 计算token
--- a/relay/common/relay_info.go
+++ b/relay/common/relay_info.go
@@ -13,23 +13,24 @@ import (
 )

 type RelayInfo struct {
-	ChannelType       int
-	ChannelId         int
-	TokenId           int
-	TokenKey          string
-	UserId            int
-	Group             string
-	TokenUnlimited    bool
-	StartTime         time.Time
-	FirstResponseTime time.Time
-	setFirstResponse  bool
-	ApiType           int
-	IsStream          bool
-	IsPlayground      bool
-	UsePrice          bool
-	RelayMode         int
-	UpstreamModelName string
-	OriginModelName   string
+	ChannelType               int
+	ChannelId                 int
+	TokenId                   int
+	TokenKey                  string
+	UserId                    int
+	Group                     string
+	TokenUnlimited            bool
+	StartTime                 time.Time
+	FirstResponseTime         time.Time
+	IsFirstResponse           bool
+	SendLastReasoningResponse bool
+	ApiType                   int
+	IsStream                  bool
+	IsPlayground              bool
+	UsePrice                  bool
+	RelayMode                 int
+	UpstreamModelName         string
+	OriginModelName           string
 	//RecodeModelName      string
 	RequestURLPath       string
 	ApiVersion           string
@@ -88,6 +89,7 @@ func GenRelayInfo(c *gin.Context) *RelayInfo {
 	apiType, _ := relayconstant.ChannelType2APIType(channelType)

 	info := &RelayInfo{
+		IsFirstResponse:   true,
 		RelayMode:         relayconstant.Path2RelayMode(c.Request.URL.Path),
 		BaseUrl:           c.GetString("base_url"),
 		RequestURLPath:    c.Request.URL.String(),
@@ -139,9 +141,9 @@ func (info *RelayInfo) SetIsStream(isStream bool) {
 }

 func (info *RelayInfo) SetFirstResponseTime() {
-	if !info.setFirstResponse {
+	if info.IsFirstResponse {
 		info.FirstResponseTime = time.Now()
-		info.setFirstResponse = true
+		info.IsFirstResponse = false
 	}
 }

--- a/relay/relay-image.go
+++ b/relay/relay-image.go
@@ -115,8 +115,8 @@ func ImageHelper(c *gin.Context) *dto.OpenAIErrorWithStatusCode {
 		}
 	}

-	imageRatio := priceData.ModelPrice * sizeRatio * qualityRatio * float64(imageRequest.N)
-	quota := int(imageRatio * priceData.GroupRatio * common.QuotaPerUnit)
+	priceData.ModelPrice *= sizeRatio * qualityRatio * float64(imageRequest.N)
+	quota := int(priceData.ModelPrice * priceData.GroupRatio * common.QuotaPerUnit)

 	if userQuota-quota < 0 {
 		return service.OpenAIErrorWrapperLocal(fmt.Errorf("image pre-consumed quota failed, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(quota)), "insufficient_user_quota", http.StatusForbidden)
--- a/router/relay-router.go
+++ b/router/relay-router.go
@@ -24,6 +24,7 @@ func SetRelayRouter(router *gin.Engine) {
 	}
 	relayV1Router := router.Group("/v1")
 	relayV1Router.Use(middleware.TokenAuth())
+	relayV1Router.Use(middleware.ModelRequestRateLimit())
 	{
 		// WebSocket 路由
 		wsRouter := relayV1Router.Group("")
--- a/service/token_counter.go
+++ b/service/token_counter.go
@@ -78,6 +78,9 @@ func getTokenEncoder(model string) *tiktoken.Tiktoken {
 }

 func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
+	if text == "" {
+		return 0
+	}
 	return len(tokenEncoder.Encode(text, nil, nil))
 }

@@ -282,30 +285,25 @@ func CountTokenMessages(info *relaycommon.RelayInfo, messages []dto.Message, mod
 		tokenNum += tokensPerMessage
 		tokenNum += getTokenNum(tokenEncoder, message.Role)
 		if len(message.Content) > 0 {
-			if message.IsStringContent() {
-				stringContent := message.StringContent()
-				tokenNum += getTokenNum(tokenEncoder, stringContent)
-				if message.Name != nil {
-					tokenNum += tokensPerName
-					tokenNum += getTokenNum(tokenEncoder, *message.Name)
-				}
-			} else {
-				arrayContent := message.ParseContent()
-				for _, m := range arrayContent {
-					if m.Type == dto.ContentTypeImageURL {
-						imageUrl := m.ImageUrl.(dto.MessageImageUrl)
-						imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
-						if err != nil {
-							return 0, err
-						}
-						tokenNum += imageTokenNum
-						log.Printf("image token num: %d", imageTokenNum)
-					} else if m.Type == dto.ContentTypeInputAudio {
-						// TODO: 音频token数量计算
-						tokenNum += 100
-					} else {
-						tokenNum += getTokenNum(tokenEncoder, m.Text)
+			if message.Name != nil {
+				tokenNum += tokensPerName
+				tokenNum += getTokenNum(tokenEncoder, *message.Name)
+			}
+			arrayContent := message.ParseContent()
+			for _, m := range arrayContent {
+				if m.Type == dto.ContentTypeImageURL {
+					imageUrl := m.ImageUrl.(dto.MessageImageUrl)
+					imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
+					if err != nil {
+						return 0, err
 					}
+					tokenNum += imageTokenNum
+					log.Printf("image token num: %d", imageTokenNum)
+				} else if m.Type == dto.ContentTypeInputAudio {
+					// TODO: 音频token数量计算
+					tokenNum += 100
+				} else {
+					tokenNum += getTokenNum(tokenEncoder, m.Text)
 				}
 			}
 		}
--- a/setting/operation_setting.go
+++ b/setting/operation_setting.go
@@ -23,6 +23,7 @@ func AutomaticDisableKeywordsFromString(s string) {
 	ak := strings.Split(s, "\n")
 	for _, k := range ak {
 		k = strings.TrimSpace(k)
+		k = strings.ToLower(k)
 		if k != "" {
 			AutomaticDisableKeywords = append(AutomaticDisableKeywords, k)
 		}
--- a/setting/rate_limit.go
+++ b/setting/rate_limit.go
@@ -0,0 +1,6 @@
+package setting
+
+var ModelRequestRateLimitEnabled = false
+var ModelRequestRateLimitDurationMinutes = 1
+var ModelRequestRateLimitCount = 0
+var ModelRequestRateLimitSuccessCount = 1000
--- a/web/src/components/LogsTable.js
+++ b/web/src/components/LogsTable.js
@@ -192,7 +192,7 @@ const LogsTable = () => {
                onClick={(event) => {
                  copyText(event, record.model_name).then(r => {});
                }}
-                suffixIcon={<IconRefresh />}
+                suffixIcon={<IconRefresh style={{width: '0.8em', height: '0.8em', opacity: 0.6}} />}
              >
                {' '}{record.model_name}{' '}
              </Tag>
--- a/web/src/components/PersonalSetting.js
+++ b/web/src/components/PersonalSetting.js
@@ -330,7 +330,7 @@ const PersonalSetting = () => {
        try {
            const res = await API.put('/api/user/setting', {
                notify_type: notificationSettings.warningType,
-                quota_warning_threshold: notificationSettings.warningThreshold,
+                quota_warning_threshold: parseFloat(notificationSettings.warningThreshold),
                webhook_url: notificationSettings.webhookUrl,
                webhook_secret: notificationSettings.webhookSecret,
                notification_email: notificationSettings.notificationEmail
--- a/web/src/components/RateLimitSetting.js
+++ b/web/src/components/RateLimitSetting.js
@@ -0,0 +1,80 @@
+import React, { useEffect, useState } from 'react';
+import { Card, Spin, Tabs } from '@douyinfe/semi-ui';
+import SettingsGeneral from '../pages/Setting/Operation/SettingsGeneral.js';
+import SettingsDrawing from '../pages/Setting/Operation/SettingsDrawing.js';
+import SettingsSensitiveWords from '../pages/Setting/Operation/SettingsSensitiveWords.js';
+import SettingsLog from '../pages/Setting/Operation/SettingsLog.js';
+import SettingsDataDashboard from '../pages/Setting/Operation/SettingsDataDashboard.js';
+import SettingsMonitoring from '../pages/Setting/Operation/SettingsMonitoring.js';
+import SettingsCreditLimit from '../pages/Setting/Operation/SettingsCreditLimit.js';
+import SettingsMagnification from '../pages/Setting/Operation/SettingsMagnification.js';
+import ModelSettingsVisualEditor from '../pages/Setting/Operation/ModelSettingsVisualEditor.js';
+import GroupRatioSettings from '../pages/Setting/Operation/GroupRatioSettings.js';
+import ModelRatioSettings from '../pages/Setting/Operation/ModelRatioSettings.js';
+
+
+import { API, showError, showSuccess } from '../helpers';
+import SettingsChats from '../pages/Setting/Operation/SettingsChats.js';
+import { useTranslation } from 'react-i18next';
+import RequestRateLimit from '../pages/Setting/RateLimit/SettingsRequestRateLimit.js';
+
+const RateLimitSetting = () => {
+  const { t } = useTranslation();
+  let [inputs, setInputs] = useState({
+    ModelRequestRateLimitEnabled: false,
+    ModelRequestRateLimitCount: 0,
+    ModelRequestRateLimitSuccessCount: 1000,
+    ModelRequestRateLimitDurationMinutes: 1,
+  });
+
+  let [loading, setLoading] = useState(false);
+
+  const getOptions = async () => {
+    const res = await API.get('/api/option/');
+    const { success, message, data } = res.data;
+    if (success) {
+      let newInputs = {};
+      data.forEach((item) => {
+        if (
+          item.key.endsWith('Enabled')
+        ) {
+          newInputs[item.key] = item.value === 'true' ? true : false;
+        } else {
+          newInputs[item.key] = item.value;
+        }
+      });
+
+      setInputs(newInputs);
+    } else {
+      showError(message);
+    }
+  };
+  async function onRefresh() {
+    try {
+      setLoading(true);
+      await getOptions();
+      // showSuccess('刷新成功');
+    } catch (error) {
+      showError('刷新失败');
+    } finally {
+      setLoading(false);
+    }
+  }
+
+  useEffect(() => {
+    onRefresh();
+  }, []);
+
+  return (
+    <>
+      <Spin spinning={loading} size='large'>
+        {/* AI请求速率限制 */}
+        <Card style={{ marginTop: '10px' }}>
+          <RequestRateLimit options={inputs} refresh={onRefresh} />
+        </Card>
+      </Spin>
+    </>
+  );
+};
+
+export default RateLimitSetting;
--- a/web/src/i18n/locales/en.json
+++ b/web/src/i18n/locales/en.json
@@ -856,7 +856,7 @@
  "IP黑名单": "IP blacklist",
  "不允许的IP，一行一个": "IPs not allowed, one per line",
  "请选择该渠道所支持的模型": "Please select the model supported by this channel",
-  "次": "Second-rate",
+  "次": "times",
  "达到限速报错内容": "Error content when the speed limit is reached",
  "不填则使用默认报错": "If not filled in, the default error will be reported.",
  "Midjouney 设置 (可选)": "Midjouney settings (optional)",
@@ -1270,5 +1270,16 @@
  "设置用于接收额度预警的邮箱地址，不填则使用账号绑定的邮箱": "Set the email address for receiving quota warning notifications, if not set, the email address bound to the account will be used",
  "留空则使用账号绑定的邮箱": "If left blank, the email address bound to the account will be used",
  "代理站地址": "Base URL",
-  "对于官方渠道，new-api已经内置地址，除非是第三方代理站点或者Azure的特殊接入地址，否则不需要填写": "For official channels, the new-api has a built-in address. Unless it is a third-party proxy site or a special Azure access address, there is no need to fill it in"
-}
+  "对于官方渠道，new-api已经内置地址，除非是第三方代理站点或者Azure的特殊接入地址，否则不需要填写": "For official channels, the new-api has a built-in address. Unless it is a third-party proxy site or a special Azure access address, there is no need to fill it in",
+  "渠道额外设置": "Channel extra settings",
+  "模型请求速率限制": "Model request rate limit",
+  "启用用户模型请求速率限制（可能会影响高并发性能）": "Enable user model request rate limit (may affect high concurrency performance)",
+  "限制周期": "Limit period",
+  "用户每周期最多请求次数": "User max request times per period",
+  "用户每周期最多请求完成次数": "User max successful request times per period",
+  "包括失败请求的次数，0代表不限制": "Including failed request times, 0 means no limit",
+  "频率限制的周期（分钟）": "Rate limit period (minutes)",
+  "只包括请求成功的次数": "Only include successful request times",
+  "保存模型速率限制": "Save model rate limit settings",
+  "速率限制设置": "Rate limit settings"
+}
--- a/web/src/pages/Setting/RateLimit/SettingsRequestRateLimit.js
+++ b/web/src/pages/Setting/RateLimit/SettingsRequestRateLimit.js
@@ -0,0 +1,159 @@
+import React, { useEffect, useState, useRef } from 'react';
+import { Button, Col, Form, Row, Spin } from '@douyinfe/semi-ui';
+import {
+  compareObjects,
+  API,
+  showError,
+  showSuccess,
+  showWarning,
+} from '../../../helpers';
+import { useTranslation } from 'react-i18next';
+
+export default function RequestRateLimit(props) {
+  const { t } = useTranslation();
+
+  const [loading, setLoading] = useState(false);
+  const [inputs, setInputs] = useState({
+    ModelRequestRateLimitEnabled: false,
+    ModelRequestRateLimitCount: -1,
+    ModelRequestRateLimitSuccessCount: 1000,
+    ModelRequestRateLimitDurationMinutes: 1
+  });
+  const refForm = useRef();
+  const [inputsRow, setInputsRow] = useState(inputs);
+
+  function onSubmit() {
+    const updateArray = compareObjects(inputs, inputsRow);
+    if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
+    const requestQueue = updateArray.map((item) => {
+      let value = '';
+      if (typeof inputs[item.key] === 'boolean') {
+        value = String(inputs[item.key]);
+      } else {
+        value = inputs[item.key];
+      }
+      return API.put('/api/option/', {
+        key: item.key,
+        value,
+      });
+    });
+    setLoading(true);
+    Promise.all(requestQueue)
+      .then((res) => {
+        if (requestQueue.length === 1) {
+          if (res.includes(undefined)) return;
+        } else if (requestQueue.length > 1) {
+          if (res.includes(undefined)) return showError(t('部分保存失败，请重试'));
+        }
+        showSuccess(t('保存成功'));
+        props.refresh();
+      })
+      .catch(() => {
+        showError(t('保存失败，请重试'));
+      })
+      .finally(() => {
+        setLoading(false);
+      });
+  }
+
+  useEffect(() => {
+    const currentInputs = {};
+    for (let key in props.options) {
+      if (Object.keys(inputs).includes(key)) {
+        currentInputs[key] = props.options[key];
+      }
+    }
+    setInputs(currentInputs);
+    setInputsRow(structuredClone(currentInputs));
+    refForm.current.setValues(currentInputs);
+  }, [props.options]);
+
+  return (
+    <>
+      <Spin spinning={loading}>
+        <Form
+          values={inputs}
+          getFormApi={(formAPI) => (refForm.current = formAPI)}
+          style={{ marginBottom: 15 }}
+        >
+          <Form.Section text={t('模型请求速率限制')}>
+            <Row gutter={16}>
+              <Col span={8}>
+                <Form.Switch
+                  field={'ModelRequestRateLimitEnabled'}
+                  label={t('启用用户模型请求速率限制（可能会影响高并发性能）')}
+                  size='default'
+                  checkedText='｜'
+                  uncheckedText='〇'
+                  onChange={(value) => {
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitEnabled: value,
+                    });
+                  }}
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('限制周期')}
+                  step={1}
+                  min={0}
+                  suffix={t('分钟')}
+                  extraText={t('频率限制的周期（分钟）')}
+                  field={'ModelRequestRateLimitDurationMinutes'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitDurationMinutes: String(value),
+                    })
+                  }
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('用户每周期最多请求次数')}
+                  step={1}
+                  min={0}
+                  suffix={t('次')}
+                  extraText={t('包括失败请求的次数，0代表不限制')}
+                  field={'ModelRequestRateLimitCount'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitCount: String(value),
+                    })
+                  }
+                />
+              </Col>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('用户每周期最多请求完成次数')}
+                  step={1}
+                  min={1}
+                  suffix={t('次')}
+                  extraText={t('只包括请求成功的次数')}
+                  field={'ModelRequestRateLimitSuccessCount'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitSuccessCount: String(value),
+                    })
+                  }
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Button size='default' onClick={onSubmit}>
+                {t('保存模型速率限制')}
+              </Button>
+            </Row>
+          </Form.Section>
+        </Form>
+      </Spin>
+    </>
+  );
+}
--- a/web/src/pages/Setting/index.js
+++ b/web/src/pages/Setting/index.js
@@ -8,6 +8,7 @@ import { isRoot } from '../../helpers';
 import OtherSetting from '../../components/OtherSetting';
 import PersonalSetting from '../../components/PersonalSetting';
 import OperationSetting from '../../components/OperationSetting';
+import RateLimitSetting from '../../components/RateLimitSetting.js';

 const Setting = () => {
  const { t } = useTranslation();
@@ -28,6 +29,11 @@ const Setting = () => {
      content: <OperationSetting />,
      itemKey: 'operation',
    });
+    panes.push({
+      tab: t('速率限制设置'),
+      content: <RateLimitSetting />,
+      itemKey: 'ratelimit',
+    });
    panes.push({
      tab: t('系统设置'),
      content: <SystemSetting />,
Author	SHA1	Message	Date
Calcium-Ion	5b9e275690	Merge pull request #788 from MartialBE/main feat: Add Claude 3.7 Sonnet thinking mode support	2025-02-25 15:21:39 +08:00
1808837298@qq.com	607e3206b3	Merge branch 'main' into thinking # Conflicts: # relay/channel/claude/dto.go	2025-02-25 15:21:22 +08:00
1808837298@qq.com	83feb492fb	feat: Add support for Claude thinking parameter in request	2025-02-25 14:37:03 +08:00
MartialBE	4f212be45c	feat: Add Claude 3.7 Sonnet thinking mode support	2025-02-25 14:10:43 +08:00
1808837298@qq.com	92918e3751	feat: Add Claude 3.7 Sonnet model to AWS channel mapping	2025-02-25 02:55:23 +08:00
1808837298@qq.com	de15551570	feat: Add support for Claude 3.7 Sonnet model	2025-02-25 02:51:31 +08:00
1808837298@qq.com	a81a28b7a5	feat: Support max_tokens parameter for Ollama channel #782	2025-02-24 17:35:49 +08:00
Calcium-Ion	dc36fdedc2	Merge pull request #781 from zeyugao/main feat: Pass extra_body in OpenAI request to the backend	2025-02-24 16:29:48 +08:00
Calcium-Ion	3017882fa3	Merge pull request #783 from Calcium-Ion/rate-limit feat: Add model request rate limiting functionality	2025-02-24 16:29:23 +08:00
1808837298@qq.com	e9ba392af8	feat: Add model rate limit settings in system configuration	2025-02-24 16:27:20 +08:00
1808837298@qq.com	83a37e4653	feat: Add model request rate limiting functionality	2025-02-24 16:20:55 +08:00
1808837298@qq.com	b6f95dca41	feat: Add support for different Dify bot types and request URLs	2025-02-24 14:18:30 +08:00
1808837298@qq.com	7ff4cebdbe	feat: Enhance token counting and content parsing for messages	2025-02-24 14:18:15 +08:00
Elsa	af00f7b311	Pass extra_body to the backend	2025-02-24 10:52:55 +08:00
1808837298@qq.com	cc1d6e1c05	fix: Improve 429 error logging with detailed message	2025-02-23 21:26:31 +08:00
1808837298@qq.com	6c7a8c811c	fix typo	2025-02-23 17:27:33 +08:00
1808837298@qq.com	d5ab7d2d34	feat: Add thinking-to-content option in channel extra settings #780	2025-02-23 17:13:08 +08:00
1808837298@qq.com	115a181db3	feat: Add thinking-to-content conversion for stream responses	2025-02-23 17:05:57 +08:00
1808837298@qq.com	88a2fec190	fix: mistral	2025-02-22 16:29:48 +08:00
1808837298@qq.com	27ea231d66	fix: fix image ratio calculation	2025-02-22 15:50:18 +08:00
Calcium-Ion	4b6101b3ea	Merge pull request #778 from utopeadia/main 美化日志界面刷新图标	2025-02-22 15:21:28 +08:00
1808837298@qq.com	48926b8a5a	fix: Ensure correct quota warning threshold type conversion	2025-02-22 15:19:55 +08:00
1808837298@qq.com	c44a32efe0	chore: update rerank.md	2025-02-22 15:13:26 +08:00
HowieWood	c541d6c97e	进一步美化刷新图标	2025-02-22 14:18:25 +08:00
HowieWood	7dfcd135da	优化日志刷新图标显示	2025-02-22 14:12:49 +08:00
1808837298@qq.com	7a13fab271	fix: ShouldDisableChannel	2025-02-22 02:02:03 +08:00
1808837298@qq.com	bf75b30870	fix: mistral adaptor (close #774 )	2025-02-21 22:21:19 +08:00