Merge pull request #781 from zeyugao/main

feat: Pass extra_body in OpenAI request to the backend
Merge pull request #783 from Calcium-Ion/rate-limit
2026-04-17 21:07:27 +00:00 · 2025-02-24 16:29:48 +08:00 · 2025-02-24 16:29:23 +08:00 · 2025-02-24 16:27:20 +08:00 · 2025-02-24 16:20:55 +08:00 · 2025-02-24 14:18:30 +08:00
38 changed files with 1094 additions and 390 deletions
--- a/README.en.md
+++ b/README.en.md
@@ -63,6 +63,8 @@
    - Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`)
    - Add suffix `-medium` to set medium reasoning effort
    - Add suffix `-low` to set low reasoning effort
+17. 🔄 Thinking to content option `thinking_to_content` in `Channel->Edit->Channel Extra Settings`, default is `false`, when `true`, the `reasoning_content` of the thinking content will be converted to `<think>` tags and concatenated to the content returned.
+18. 🔄 Model rate limit, support setting total request limit and successful request limit in `System Settings->Rate Limit Settings`

 ## Model Support
 This version additionally supports:
--- a/README.md
+++ b/README.md
@@ -69,6 +69,8 @@
    - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
    - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
    - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
+18. 🔄 思考转内容，支持在 `渠道-编辑-渠道额外设置` 中设置 `thinking_to_content` 选项，默认`false`，开启后会将思考内容`reasoning_content`转换为`<think>`标签拼接到内容中返回。
+19. 🔄 模型限流，支持在 `系统设置-速率限制设置` 中设置模型限流，支持设置总请求数限制和成功请求数限制

 ## 模型支持
 此版本额外支持以下模型：
--- a/Rerank.md
+++ b/Rerank.md
@@ -13,7 +13,7 @@ Request:

 ```json
 {
-  "model": "rerank-multilingual-v3.0",
+  "model": "jina-reranker-v2-base-multilingual",
  "query": "What is the capital of the United States?",
  "top_n": 3,
  "documents": [
--- a/common/constants.go
+++ b/common/constants.go
@@ -276,7 +276,7 @@ var ChannelBaseURLs = []string{
 	"https://api.cohere.ai",                     //34
 	"https://api.minimax.chat",                  //35
 	"",                                          //36
-	"",                                          //37
+	"https://api.dify.ai",                       //37
 	"https://api.jina.ai",                       //38
 	"https://api.cloudflare.com",                //39
 	"https://api.siliconflow.cn",                //40
--- a/constant/channel_setting.go
+++ b/constant/channel_setting.go
@@ -1,6 +1,7 @@
 package constant

 var (
-	ForceFormat        = "force_format" // ForceFormat 强制格式化为OpenAI格式
-	ChanelSettingProxy = "proxy"        // Proxy 代理
+	ForceFormat                     = "force_format"        // ForceFormat 强制格式化为OpenAI格式
+	ChanelSettingProxy              = "proxy"               // Proxy 代理
+	ChannelSettingThinkingToContent = "thinking_to_content" // ThinkingToContent
 )
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -24,7 +24,7 @@ func relayHandler(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode
 	var err *dto.OpenAIErrorWithStatusCode
 	switch relayMode {
 	case relayconstant.RelayModeImagesGenerations:
-		err = relay.ImageHelper(c, relayMode)
+		err = relay.ImageHelper(c)
 	case relayconstant.RelayModeAudioSpeech:
 		fallthrough
 	case relayconstant.RelayModeAudioTranslation:
@@ -85,6 +85,7 @@ func Relay(c *gin.Context) {

 	if openaiErr != nil {
 		if openaiErr.StatusCode == http.StatusTooManyRequests {
+			common.LogError(c, fmt.Sprintf("origin 429 error: %s", openaiErr.Error.Message))
 			openaiErr.Error.Message = "当前分组上游负载已饱和，请稍后再试"
 		}
 		openaiErr.Error.Message = common.MessageWithRequestId(openaiErr.Error.Message, requestId)
--- a/controller/user.go
+++ b/controller/user.go
@@ -913,11 +913,11 @@ func TopUp(c *gin.Context) {
 }

 type UpdateUserSettingRequest struct {
-	QuotaWarningType      string `json:"notify_type"`
-	QuotaWarningThreshold int    `json:"quota_warning_threshold"`
-	WebhookUrl            string `json:"webhook_url,omitempty"`
-	WebhookSecret         string `json:"webhook_secret,omitempty"`
-	NotificationEmail     string `json:"notification_email,omitempty"`
+	QuotaWarningType      string  `json:"notify_type"`
+	QuotaWarningThreshold float64 `json:"quota_warning_threshold"`
+	WebhookUrl            string  `json:"webhook_url,omitempty"`
+	WebhookSecret         string  `json:"webhook_secret,omitempty"`
+	NotificationEmail     string  `json:"notification_email,omitempty"`
 }

 func UpdateUserSetting(c *gin.Context) {
--- a/docs/channel/other_setting.md
+++ b/docs/channel/other_setting.md
@@ -10,6 +10,10 @@
    - 用于配置网络代理
    - 类型为字符串，填写代理地址（例如 socks5 协议的代理地址）

+3. thinking_to_content
+   - 用于标识是否将思考内容`reasoning_conetnt`转换为`<think>`标签拼接到内容中返回
+   - 类型为布尔值，设置为 true 时启用思考内容转换
+
 --------------------------------------------------------------

 ## JSON 格式示例
@@ -19,6 +23,7 @@
 ```json
 {
    "force_format": true,
+   "thinking_to_content": true,
    "proxy": "socks5://xxxxxxx"
 }
 ```
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -1,6 +1,9 @@
 package dto

-import "encoding/json"
+import (
+	"encoding/json"
+	"strings"
+)

 type ResponseFormat struct {
 	Type       string            `json:"type,omitempty"`
@@ -47,6 +50,7 @@ type GeneralOpenAIRequest struct {
 	Dimensions          int             `json:"dimensions,omitempty"`
 	Modalities          any             `json:"modalities,omitempty"`
 	Audio               any             `json:"audio,omitempty"`
+	ExtraBody           any             `json:"extra_body,omitempty"`
 }

 type OpenAITools struct {
@@ -88,20 +92,20 @@ func (r GeneralOpenAIRequest) ParseInput() []string {
 }

 type Message struct {
-	Role    string          `json:"role"`
-	Content json.RawMessage `json:"content"`
-	// parsedContent not json field
-	parsedContent    []MediaContent
-	Name             *string         `json:"name,omitempty"`
-	Prefix           *bool           `json:"prefix,omitempty"`
-	ReasoningContent string          `json:"reasoning_content,omitempty"`
-	ToolCalls        json.RawMessage `json:"tool_calls,omitempty"`
-	ToolCallId       string          `json:"tool_call_id,omitempty"`
+	Role                string          `json:"role"`
+	Content             json.RawMessage `json:"content"`
+	Name                *string         `json:"name,omitempty"`
+	Prefix              *bool           `json:"prefix,omitempty"`
+	ReasoningContent    string          `json:"reasoning_content,omitempty"`
+	ToolCalls           json.RawMessage `json:"tool_calls,omitempty"`
+	ToolCallId          string          `json:"tool_call_id,omitempty"`
+	parsedContent       []MediaContent
+	parsedStringContent *string
 }

 type MediaContent struct {
 	Type       string `json:"type"`
-	Text       string `json:"text"`
+	Text       string `json:"text,omitempty"`
 	ImageUrl   any    `json:"image_url,omitempty"`
 	InputAudio any    `json:"input_audio,omitempty"`
 }
@@ -150,26 +154,50 @@ func (m *Message) SetToolCalls(toolCalls any) {
 }

 func (m *Message) StringContent() string {
+	if m.parsedStringContent != nil {
+		return *m.parsedStringContent
+	}
+
 	var stringContent string
 	if err := json.Unmarshal(m.Content, &stringContent); err == nil {
+		m.parsedStringContent = &stringContent
 		return stringContent
 	}
-	return string(m.Content)
+
+	contentStr := new(strings.Builder)
+	arrayContent := m.ParseContent()
+	for _, content := range arrayContent {
+		if content.Type == ContentTypeText {
+			contentStr.WriteString(content.Text)
+		}
+	}
+	stringContent = contentStr.String()
+	m.parsedStringContent = &stringContent
+
+	return stringContent
 }

 func (m *Message) SetStringContent(content string) {
 	jsonContent, _ := json.Marshal(content)
 	m.Content = jsonContent
+	m.parsedStringContent = &content
+	m.parsedContent = nil
 }

 func (m *Message) SetMediaContent(content []MediaContent) {
 	jsonContent, _ := json.Marshal(content)
 	m.Content = jsonContent
+	m.parsedContent = nil
+	m.parsedStringContent = nil
 }

 func (m *Message) IsStringContent() bool {
+	if m.parsedStringContent != nil {
+		return true
+	}
 	var stringContent string
 	if err := json.Unmarshal(m.Content, &stringContent); err == nil {
+		m.parsedStringContent = &stringContent
 		return true
 	}
 	return false
@@ -179,72 +207,86 @@ func (m *Message) ParseContent() []MediaContent {
 	if m.parsedContent != nil {
 		return m.parsedContent
 	}
+
 	var contentList []MediaContent
-	defer func() {
-		if len(contentList) > 0 {
-			m.parsedContent = contentList
-		}
-	}()
+
+	// 先尝试解析为字符串
 	var stringContent string
 	if err := json.Unmarshal(m.Content, &stringContent); err == nil {
-		contentList = append(contentList, MediaContent{
+		contentList = []MediaContent{{
 			Type: ContentTypeText,
 			Text: stringContent,
-		})
+		}}
+		m.parsedContent = contentList
 		return contentList
 	}
-	var arrayContent []json.RawMessage
+
+	// 尝试解析为数组
+	var arrayContent []map[string]interface{}
 	if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
 		for _, contentItem := range arrayContent {
-			var contentMap map[string]any
-			if err := json.Unmarshal(contentItem, &contentMap); err != nil {
+			contentType, ok := contentItem["type"].(string)
+			if !ok {
 				continue
 			}
-			switch contentMap["type"] {
+
+			switch contentType {
 			case ContentTypeText:
-				if subStr, ok := contentMap["text"].(string); ok {
+				if text, ok := contentItem["text"].(string); ok {
 					contentList = append(contentList, MediaContent{
 						Type: ContentTypeText,
-						Text: subStr,
+						Text: text,
 					})
 				}
+
 			case ContentTypeImageURL:
-				if subObj, ok := contentMap["image_url"].(map[string]any); ok {
-					detail, ok := subObj["detail"]
-					if ok {
-						subObj["detail"] = detail.(string)
-					} else {
-						subObj["detail"] = "high"
-					}
+				imageUrl := contentItem["image_url"]
+				switch v := imageUrl.(type) {
+				case string:
 					contentList = append(contentList, MediaContent{
 						Type: ContentTypeImageURL,
 						ImageUrl: MessageImageUrl{
-							Url:    subObj["url"].(string),
-							Detail: subObj["detail"].(string),
-						},
-					})
-				} else if url, ok := contentMap["image_url"].(string); ok {
-					contentList = append(contentList, MediaContent{
-						Type: ContentTypeImageURL,
-						ImageUrl: MessageImageUrl{
-							Url:    url,
+							Url:    v,
 							Detail: "high",
 						},
 					})
+				case map[string]interface{}:
+					url, ok1 := v["url"].(string)
+					detail, ok2 := v["detail"].(string)
+					if !ok2 {
+						detail = "high"
+					}
+					if ok1 {
+						contentList = append(contentList, MediaContent{
+							Type: ContentTypeImageURL,
+							ImageUrl: MessageImageUrl{
+								Url:    url,
+								Detail: detail,
+							},
+						})
+					}
 				}
+
 			case ContentTypeInputAudio:
-				if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
-					contentList = append(contentList, MediaContent{
-						Type: ContentTypeInputAudio,
-						InputAudio: MessageInputAudio{
-							Data:   subObj["data"].(string),
-							Format: subObj["format"].(string),
-						},
-					})
+				if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
+					data, ok1 := audioData["data"].(string)
+					format, ok2 := audioData["format"].(string)
+					if ok1 && ok2 {
+						contentList = append(contentList, MediaContent{
+							Type: ContentTypeInputAudio,
+							InputAudio: MessageInputAudio{
+								Data:   data,
+								Format: format,
+							},
+						})
+					}
 				}
 			}
 		}
-		return contentList
 	}
-	return nil
+
+	if len(contentList) > 0 {
+		m.parsedContent = contentList
+	}
+	return contentList
 }
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -62,9 +62,10 @@ type ChatCompletionsStreamResponseChoice struct {
 }

 type ChatCompletionsStreamResponseChoiceDelta struct {
-	Content   *string    `json:"content,omitempty"`
-	Role      string     `json:"role,omitempty"`
-	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
+	Content          *string    `json:"content,omitempty"`
+	ReasoningContent *string    `json:"reasoning_content,omitempty"`
+	Role             string     `json:"role,omitempty"`
+	ToolCalls        []ToolCall `json:"tool_calls,omitempty"`
 }

 func (c *ChatCompletionsStreamResponseChoiceDelta) SetContentString(s string) {
@@ -78,6 +79,17 @@ func (c *ChatCompletionsStreamResponseChoiceDelta) GetContentString() string {
 	return *c.Content
 }

+func (c *ChatCompletionsStreamResponseChoiceDelta) GetReasoningContent() string {
+	if c.ReasoningContent == nil {
+		return ""
+	}
+	return *c.ReasoningContent
+}
+
+func (c *ChatCompletionsStreamResponseChoiceDelta) SetReasoningContent(s string) {
+	c.ReasoningContent = &s
+}
+
 type ToolCall struct {
 	// Index is not nil only in chat completion chunk object
 	Index    *int         `json:"index,omitempty"`
@@ -108,6 +120,20 @@ type ChatCompletionsStreamResponse struct {
 	Usage             *Usage                                `json:"usage"`
 }

+func (c *ChatCompletionsStreamResponse) Copy() *ChatCompletionsStreamResponse {
+	choices := make([]ChatCompletionsStreamResponseChoice, len(c.Choices))
+	copy(choices, c.Choices)
+	return &ChatCompletionsStreamResponse{
+		Id:                c.Id,
+		Object:            c.Object,
+		Created:           c.Created,
+		Model:             c.Model,
+		SystemFingerprint: c.SystemFingerprint,
+		Choices:           choices,
+		Usage:             c.Usage,
+	}
+}
+
 func (c *ChatCompletionsStreamResponse) GetSystemFingerprint() string {
 	if c.SystemFingerprint == nil {
 		return ""
--- a/middleware/model-rate-limit.go
+++ b/middleware/model-rate-limit.go
@@ -0,0 +1,172 @@
+package middleware
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"one-api/common"
+	"one-api/setting"
+	"strconv"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/go-redis/redis/v8"
+)
+
+const (
+	ModelRequestRateLimitCountMark        = "MRRL"
+	ModelRequestRateLimitSuccessCountMark = "MRRLS"
+)
+
+// 检查Redis中的请求限制
+func checkRedisRateLimit(ctx context.Context, rdb *redis.Client, key string, maxCount int, duration int64) (bool, error) {
+	// 如果maxCount为0，表示不限制
+	if maxCount == 0 {
+		return true, nil
+	}
+
+	// 获取当前计数
+	length, err := rdb.LLen(ctx, key).Result()
+	if err != nil {
+		return false, err
+	}
+
+	// 如果未达到限制，允许请求
+	if length < int64(maxCount) {
+		return true, nil
+	}
+
+	// 检查时间窗口
+	oldTimeStr, _ := rdb.LIndex(ctx, key, -1).Result()
+	oldTime, err := time.Parse(timeFormat, oldTimeStr)
+	if err != nil {
+		return false, err
+	}
+
+	nowTimeStr := time.Now().Format(timeFormat)
+	nowTime, err := time.Parse(timeFormat, nowTimeStr)
+	if err != nil {
+		return false, err
+	}
+	// 如果在时间窗口内已达到限制，拒绝请求
+	subTime := nowTime.Sub(oldTime).Seconds()
+	if int64(subTime) < duration {
+		rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// 记录Redis请求
+func recordRedisRequest(ctx context.Context, rdb *redis.Client, key string, maxCount int) {
+	// 如果maxCount为0，不记录请求
+	if maxCount == 0 {
+		return
+	}
+
+	now := time.Now().Format(timeFormat)
+	rdb.LPush(ctx, key, now)
+	rdb.LTrim(ctx, key, 0, int64(maxCount-1))
+	rdb.Expire(ctx, key, common.RateLimitKeyExpirationDuration)
+}
+
+// Redis限流处理器
+func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		ctx := context.Background()
+		rdb := common.RDB
+
+		// 1. 检查总请求数限制（当totalMaxCount为0时会自动跳过）
+		totalKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitCountMark, userId)
+		allowed, err := checkRedisRateLimit(ctx, rdb, totalKey, totalMaxCount, duration)
+		if err != nil {
+			fmt.Println("检查总请求数限制失败:", err.Error())
+			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+			return
+		}
+		if !allowed {
+			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制：%d分钟内最多请求%d次，包括失败次数，请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
+		}
+
+		// 2. 检查成功请求数限制
+		successKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitSuccessCountMark, userId)
+		allowed, err = checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
+		if err != nil {
+			fmt.Println("检查成功请求数限制失败:", err.Error())
+			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+			return
+		}
+		if !allowed {
+			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到请求数限制：%d分钟内最多请求%d次", setting.ModelRequestRateLimitDurationMinutes, successMaxCount))
+			return
+		}
+
+		// 3. 记录总请求（当totalMaxCount为0时会自动跳过）
+		recordRedisRequest(ctx, rdb, totalKey, totalMaxCount)
+
+		// 4. 处理请求
+		c.Next()
+
+		// 5. 如果请求成功，记录成功请求
+		if c.Writer.Status() < 400 {
+			recordRedisRequest(ctx, rdb, successKey, successMaxCount)
+		}
+	}
+}
+
+// 内存限流处理器
+func memoryRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	inMemoryRateLimiter.Init(common.RateLimitKeyExpirationDuration)
+
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		totalKey := ModelRequestRateLimitCountMark + userId
+		successKey := ModelRequestRateLimitSuccessCountMark + userId
+
+		// 1. 检查总请求数限制（当totalMaxCount为0时跳过）
+		if totalMaxCount > 0 && !inMemoryRateLimiter.Request(totalKey, totalMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 2. 检查成功请求数限制
+		// 使用一个临时key来检查限制，这样可以避免实际记录
+		checkKey := successKey + "_check"
+		if !inMemoryRateLimiter.Request(checkKey, successMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 3. 处理请求
+		c.Next()
+
+		// 4. 如果请求成功，记录到实际的成功请求计数中
+		if c.Writer.Status() < 400 {
+			inMemoryRateLimiter.Request(successKey, successMaxCount, duration)
+		}
+	}
+}
+
+// ModelRequestRateLimit 模型请求限流中间件
+func ModelRequestRateLimit() func(c *gin.Context) {
+	// 如果未启用限流，直接放行
+	if !setting.ModelRequestRateLimitEnabled {
+		return defNext
+	}
+
+	// 计算限流参数
+	duration := int64(setting.ModelRequestRateLimitDurationMinutes * 60)
+	totalMaxCount := setting.ModelRequestRateLimitCount
+	successMaxCount := setting.ModelRequestRateLimitSuccessCount
+
+	// 根据存储类型选择限流处理器
+	if common.RedisEnabled {
+		return redisRateLimitHandler(duration, totalMaxCount, successMaxCount)
+	} else {
+		return memoryRateLimitHandler(duration, totalMaxCount, successMaxCount)
+	}
+}
--- a/model/option.go
+++ b/model/option.go
@@ -84,7 +84,10 @@ func InitOptionMap() {
 	common.OptionMap["QuotaForInviter"] = strconv.Itoa(common.QuotaForInviter)
 	common.OptionMap["QuotaForInvitee"] = strconv.Itoa(common.QuotaForInvitee)
 	common.OptionMap["QuotaRemindThreshold"] = strconv.Itoa(common.QuotaRemindThreshold)
-	common.OptionMap["PreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
+	common.OptionMap["ShouldPreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
+	common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
+	common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
+	common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
 	common.OptionMap["ModelRatio"] = common.ModelRatio2JSONString()
 	common.OptionMap["ModelPrice"] = common.ModelPrice2JSONString()
 	common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
@@ -105,6 +108,7 @@ func InitOptionMap() {
 	common.OptionMap["MjActionCheckSuccessEnabled"] = strconv.FormatBool(setting.MjActionCheckSuccessEnabled)
 	common.OptionMap["CheckSensitiveEnabled"] = strconv.FormatBool(setting.CheckSensitiveEnabled)
 	common.OptionMap["DemoSiteEnabled"] = strconv.FormatBool(setting.DemoSiteEnabled)
+	common.OptionMap["ModelRequestRateLimitEnabled"] = strconv.FormatBool(setting.ModelRequestRateLimitEnabled)
 	common.OptionMap["CheckSensitiveOnPromptEnabled"] = strconv.FormatBool(setting.CheckSensitiveOnPromptEnabled)
 	//common.OptionMap["CheckSensitiveOnCompletionEnabled"] = strconv.FormatBool(constant.CheckSensitiveOnCompletionEnabled)
 	common.OptionMap["StopOnSensitiveEnabled"] = strconv.FormatBool(setting.StopOnSensitiveEnabled)
@@ -226,6 +230,9 @@ func updateOptionMap(key string, value string) (err error) {
 			setting.DemoSiteEnabled = boolValue
 		case "CheckSensitiveOnPromptEnabled":
 			setting.CheckSensitiveOnPromptEnabled = boolValue
+		case "ModelRequestRateLimitEnabled":
+			setting.ModelRequestRateLimitEnabled = boolValue
+
 		//case "CheckSensitiveOnCompletionEnabled":
 		//	constant.CheckSensitiveOnCompletionEnabled = boolValue
 		case "StopOnSensitiveEnabled":
@@ -306,8 +313,14 @@ func updateOptionMap(key string, value string) (err error) {
 		common.QuotaForInvitee, _ = strconv.Atoi(value)
 	case "QuotaRemindThreshold":
 		common.QuotaRemindThreshold, _ = strconv.Atoi(value)
-	case "PreConsumedQuota":
+	case "ShouldPreConsumedQuota":
 		common.PreConsumedQuota, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitCount":
+		setting.ModelRequestRateLimitCount, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitDurationMinutes":
+		setting.ModelRequestRateLimitDurationMinutes, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitSuccessCount":
+		setting.ModelRequestRateLimitSuccessCount, _ = strconv.Atoi(value)
 	case "RetryTimes":
 		common.RetryTimes, _ = strconv.Atoi(value)
 	case "DataExportInterval":
--- a/relay/channel/dify/adaptor.go
+++ b/relay/channel/dify/adaptor.go
@@ -9,9 +9,18 @@ import (
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+	"strings"
+)
+
+const (
+	BotTypeChatFlow   = 1 // chatflow default
+	BotTypeAgent      = 2
+	BotTypeWorkFlow   = 3
+	BotTypeCompletion = 4
 )

 type Adaptor struct {
+	BotType int
 }

 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
@@ -25,10 +34,28 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
+	if strings.HasPrefix(info.UpstreamModelName, "agent") {
+		a.BotType = BotTypeAgent
+	} else if strings.HasPrefix(info.UpstreamModelName, "workflow") {
+		a.BotType = BotTypeWorkFlow
+	} else if strings.HasPrefix(info.UpstreamModelName, "chat") {
+		a.BotType = BotTypeCompletion
+	} else {
+		a.BotType = BotTypeChatFlow
+	}
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	switch a.BotType {
+	case BotTypeWorkFlow:
+		return fmt.Sprintf("%s/v1/workflows/run", info.BaseUrl), nil
+	case BotTypeCompletion:
+		return fmt.Sprintf("%s/v1/completion-messages", info.BaseUrl), nil
+	case BotTypeAgent:
+		fallthrough
+	default:
+		return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	}
 }

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
@@ -53,7 +80,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

-
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/mistral/adaptor.go
+++ b/relay/channel/mistral/adaptor.go
@@ -41,9 +41,7 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, re
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	mistralReq := requestOpenAI2Mistral(*request)
-	//common.LogJson(c, "body", mistralReq)
-	return mistralReq, nil
+	return requestOpenAI2Mistral(request), nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -55,7 +53,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

-
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/mistral/text.go
+++ b/relay/channel/mistral/text.go
@@ -1,25 +1,21 @@
 package mistral

 import (
-	"encoding/json"
 	"one-api/dto"
 )

-func requestOpenAI2Mistral(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
+func requestOpenAI2Mistral(request *dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
 	messages := make([]dto.Message, 0, len(request.Messages))
 	for _, message := range request.Messages {
-		if !message.IsStringContent() {
-			mediaMessages := message.ParseContent()
-			for j, mediaMessage := range mediaMessages {
-				if mediaMessage.Type == dto.ContentTypeImageURL {
-					imageUrl := mediaMessage.ImageUrl.(dto.MessageImageUrl)
-					mediaMessage.ImageUrl = imageUrl.Url
-					mediaMessages[j] = mediaMessage
-				}
+		mediaMessages := message.ParseContent()
+		for j, mediaMessage := range mediaMessages {
+			if mediaMessage.Type == dto.ContentTypeImageURL {
+				imageUrl := mediaMessage.ImageUrl.(dto.MessageImageUrl)
+				mediaMessage.ImageUrl = imageUrl.Url
+				mediaMessages[j] = mediaMessage
 			}
-			messageRaw, _ := json.Marshal(mediaMessages)
-			message.Content = messageRaw
 		}
+		message.SetMediaContent(mediaMessages)
 		messages = append(messages, dto.Message{
 			Role:       message.Role,
 			Content:    message.Content,
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -5,10 +5,6 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"github.com/bytedance/gopkg/util/gopool"
-	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
-	"github.com/pkg/errors"
 	"io"
 	"math"
 	"mime/multipart"
@@ -23,21 +19,66 @@ import (
 	"strings"
 	"sync"
 	"time"
+
+	"github.com/bytedance/gopkg/util/gopool"
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
+	"github.com/pkg/errors"
 )

-func sendStreamData(c *gin.Context, data string, forceFormat bool) error {
+func sendStreamData(c *gin.Context, info *relaycommon.RelayInfo, data string, forceFormat bool, thinkToContent bool) error {
 	if data == "" {
 		return nil
 	}

-	if forceFormat {
-		var lastStreamResponse dto.ChatCompletionsStreamResponse
-		if err := json.Unmarshal(common.StringToByteSlice(data), &lastStreamResponse); err != nil {
-			return err
-		}
+	if !forceFormat && !thinkToContent {
+		return service.StringData(c, data)
+	}
+
+	var lastStreamResponse dto.ChatCompletionsStreamResponse
+	if err := json.Unmarshal(common.StringToByteSlice(data), &lastStreamResponse); err != nil {
+		return err
+	}
+
+	if !thinkToContent {
 		return service.ObjectData(c, lastStreamResponse)
 	}
-	return service.StringData(c, data)
+
+	// Handle think to content conversion
+	if info.IsFirstResponse {
+		response := lastStreamResponse.Copy()
+		for i := range response.Choices {
+			response.Choices[i].Delta.SetContentString("<think>\n")
+			response.Choices[i].Delta.SetReasoningContent("")
+		}
+		service.ObjectData(c, response)
+	}
+
+	if lastStreamResponse.Choices == nil || len(lastStreamResponse.Choices) == 0 {
+		return service.ObjectData(c, lastStreamResponse)
+	}
+
+	// Process each choice
+	for i, choice := range lastStreamResponse.Choices {
+		// Handle transition from thinking to content
+		if len(choice.Delta.GetContentString()) > 0 && !info.SendLastReasoningResponse {
+			response := lastStreamResponse.Copy()
+			for j := range response.Choices {
+				response.Choices[j].Delta.SetContentString("\n</think>")
+				response.Choices[j].Delta.SetReasoningContent("")
+			}
+			info.SendLastReasoningResponse = true
+			service.ObjectData(c, response)
+		}
+
+		// Convert reasoning content to regular content
+		if len(choice.Delta.GetReasoningContent()) > 0 {
+			lastStreamResponse.Choices[i].Delta.SetContentString(choice.Delta.GetReasoningContent())
+			lastStreamResponse.Choices[i].Delta.SetReasoningContent("")
+		}
+	}
+
+	return service.ObjectData(c, lastStreamResponse)
 }

 func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
@@ -56,11 +97,14 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	var usage = &dto.Usage{}
 	var streamItems []string // store stream items
 	var forceFormat bool
+	var thinkToContent bool

-	if info.ChannelType == common.ChannelTypeCustom {
-		if forceFmt, ok := info.ChannelSetting["force_format"].(bool); ok {
-			forceFormat = forceFmt
-		}
+	if forceFmt, ok := info.ChannelSetting[constant.ForceFormat].(bool); ok {
+		forceFormat = forceFmt
+	}
+
+	if think2Content, ok := info.ChannelSetting[constant.ChannelSettingThinkingToContent].(bool); ok {
+		thinkToContent = think2Content
 	}

 	toolCount := 0
@@ -84,9 +128,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	)
 	gopool.Go(func() {
 		for scanner.Scan() {
-			info.SetFirstResponseTime()
+			//info.SetFirstResponseTime()
 			ticker.Reset(time.Duration(constant.StreamingTimeout) * time.Second)
 			data := scanner.Text()
+			if common.DebugEnabled {
+				println(data)
+			}
 			if len(data) < 6 { // ignore blank line or wrong format
 				continue
 			}
@@ -98,10 +145,11 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 			data = strings.TrimSpace(data)
 			if !strings.HasPrefix(data, "[DONE]") {
 				if lastStreamData != "" {
-					err := sendStreamData(c, lastStreamData, forceFormat)
+					err := sendStreamData(c, info, lastStreamData, forceFormat, thinkToContent)
 					if err != nil {
 						common.LogError(c, "streaming error: "+err.Error())
 					}
+					info.SetFirstResponseTime()
 				}
 				lastStreamData = data
 				streamItems = append(streamItems, data)
@@ -141,7 +189,7 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 		}
 	}
 	if shouldSendLastResp {
-		sendStreamData(c, lastStreamData, forceFormat)
+		sendStreamData(c, info, lastStreamData, forceFormat, thinkToContent)
 	}

 	// 计算token
@@ -162,6 +210,7 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 					//}
 					for _, choice := range streamResponse.Choices {
 						responseTextBuilder.WriteString(choice.Delta.GetContentString())
+						responseTextBuilder.WriteString(choice.Delta.GetReasoningContent())
 						if choice.Delta.ToolCalls != nil {
 							if len(choice.Delta.ToolCalls) > toolCount {
 								toolCount = len(choice.Delta.ToolCalls)
@@ -182,6 +231,7 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 				//}
 				for _, choice := range streamResponse.Choices {
 					responseTextBuilder.WriteString(choice.Delta.GetContentString())
+					responseTextBuilder.WriteString(choice.Delta.GetReasoningContent())
 					if choice.Delta.ToolCalls != nil {
 						if len(choice.Delta.ToolCalls) > toolCount {
 							toolCount = len(choice.Delta.ToolCalls)
@@ -273,7 +323,7 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, promptTokens int, model
 	if simpleResponse.Usage.TotalTokens == 0 || (simpleResponse.Usage.PromptTokens == 0 && simpleResponse.Usage.CompletionTokens == 0) {
 		completionTokens := 0
 		for _, choice := range simpleResponse.Choices {
-			ctkm, _ := service.CountTextToken(string(choice.Message.Content), model)
+			ctkm, _ := service.CountTextToken(choice.Message.StringContent()+choice.Message.ReasoningContent, model)
 			completionTokens += ctkm
 		}
 		simpleResponse.Usage = dto.Usage{
--- a/relay/common/relay_info.go
+++ b/relay/common/relay_info.go
@@ -13,24 +13,25 @@ import (
 )

 type RelayInfo struct {
-	ChannelType          int
-	ChannelId            int
-	TokenId              int
-	TokenKey             string
-	UserId               int
-	Group                string
-	TokenUnlimited       bool
-	StartTime            time.Time
-	FirstResponseTime    time.Time
-	setFirstResponse     bool
-	ApiType              int
-	IsStream             bool
-	IsPlayground         bool
-	UsePrice             bool
-	RelayMode            int
-	UpstreamModelName    string
-	OriginModelName      string
-	RecodeModelName      string
+	ChannelType               int
+	ChannelId                 int
+	TokenId                   int
+	TokenKey                  string
+	UserId                    int
+	Group                     string
+	TokenUnlimited            bool
+	StartTime                 time.Time
+	FirstResponseTime         time.Time
+	IsFirstResponse           bool
+	SendLastReasoningResponse bool
+	ApiType                   int
+	IsStream                  bool
+	IsPlayground              bool
+	UsePrice                  bool
+	RelayMode                 int
+	UpstreamModelName         string
+	OriginModelName           string
+	//RecodeModelName      string
 	RequestURLPath       string
 	ApiVersion           string
 	PromptTokens         int
@@ -39,6 +40,7 @@ type RelayInfo struct {
 	BaseUrl              string
 	SupportStreamOptions bool
 	ShouldIncludeUsage   bool
+	IsModelMapped        bool
 	ClientWs             *websocket.Conn
 	TargetWs             *websocket.Conn
 	InputAudioFormat     string
@@ -50,6 +52,18 @@ type RelayInfo struct {
 	ChannelSetting       map[string]interface{}
 }

+// 定义支持流式选项的通道类型
+var streamSupportedChannels = map[int]bool{
+	common.ChannelTypeOpenAI:     true,
+	common.ChannelTypeAnthropic:  true,
+	common.ChannelTypeAws:        true,
+	common.ChannelTypeGemini:     true,
+	common.ChannelCloudflare:     true,
+	common.ChannelTypeAzure:      true,
+	common.ChannelTypeVolcEngine: true,
+	common.ChannelTypeOllama:     true,
+}
+
 func GenRelayInfoWs(c *gin.Context, ws *websocket.Conn) *RelayInfo {
 	info := GenRelayInfo(c)
 	info.ClientWs = ws
@@ -75,6 +89,7 @@ func GenRelayInfo(c *gin.Context) *RelayInfo {
 	apiType, _ := relayconstant.ChannelType2APIType(channelType)

 	info := &RelayInfo{
+		IsFirstResponse:   true,
 		RelayMode:         relayconstant.Path2RelayMode(c.Request.URL.Path),
 		BaseUrl:           c.GetString("base_url"),
 		RequestURLPath:    c.Request.URL.String(),
@@ -89,12 +104,13 @@ func GenRelayInfo(c *gin.Context) *RelayInfo {
 		FirstResponseTime: startTime.Add(-time.Second),
 		OriginModelName:   c.GetString("original_model"),
 		UpstreamModelName: c.GetString("original_model"),
-		RecodeModelName:   c.GetString("recode_model"),
-		ApiType:           apiType,
-		ApiVersion:        c.GetString("api_version"),
-		ApiKey:            strings.TrimPrefix(c.Request.Header.Get("Authorization"), "Bearer "),
-		Organization:      c.GetString("channel_organization"),
-		ChannelSetting:    channelSetting,
+		//RecodeModelName:   c.GetString("original_model"),
+		IsModelMapped:  false,
+		ApiType:        apiType,
+		ApiVersion:     c.GetString("api_version"),
+		ApiKey:         strings.TrimPrefix(c.Request.Header.Get("Authorization"), "Bearer "),
+		Organization:   c.GetString("channel_organization"),
+		ChannelSetting: channelSetting,
 	}
 	if strings.HasPrefix(c.Request.URL.Path, "/pg") {
 		info.IsPlayground = true
@@ -110,10 +126,7 @@ func GenRelayInfo(c *gin.Context) *RelayInfo {
 	if info.ChannelType == common.ChannelTypeVertexAi {
 		info.ApiVersion = c.GetString("region")
 	}
-	if info.ChannelType == common.ChannelTypeOpenAI || info.ChannelType == common.ChannelTypeAnthropic ||
-		info.ChannelType == common.ChannelTypeAws || info.ChannelType == common.ChannelTypeGemini ||
-		info.ChannelType == common.ChannelCloudflare || info.ChannelType == common.ChannelTypeAzure ||
-		info.ChannelType == common.ChannelTypeVolcEngine || info.ChannelType == common.ChannelTypeOllama {
+	if streamSupportedChannels[info.ChannelType] {
 		info.SupportStreamOptions = true
 	}
 	return info
@@ -128,9 +141,9 @@ func (info *RelayInfo) SetIsStream(isStream bool) {
 }

 func (info *RelayInfo) SetFirstResponseTime() {
-	if !info.setFirstResponse {
+	if info.IsFirstResponse {
 		info.FirstResponseTime = time.Now()
-		info.setFirstResponse = true
+		info.IsFirstResponse = false
 	}
 }

--- a/relay/helper/model_mapped.go
+++ b/relay/helper/model_mapped.go
@@ -0,0 +1,25 @@
+package helper
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/gin-gonic/gin"
+	"one-api/relay/common"
+)
+
+func ModelMappedHelper(c *gin.Context, info *common.RelayInfo) error {
+	// map model name
+	modelMapping := c.GetString("model_mapping")
+	if modelMapping != "" && modelMapping != "{}" {
+		modelMap := make(map[string]string)
+		err := json.Unmarshal([]byte(modelMapping), &modelMap)
+		if err != nil {
+			return fmt.Errorf("unmarshal_model_mapping_failed")
+		}
+		if modelMap[info.OriginModelName] != "" {
+			info.UpstreamModelName = modelMap[info.OriginModelName]
+			info.IsModelMapped = true
+		}
+	}
+	return nil
+}
--- a/relay/helper/price.go
+++ b/relay/helper/price.go
@@ -0,0 +1,41 @@
+package helper
+
+import (
+	"github.com/gin-gonic/gin"
+	"one-api/common"
+	relaycommon "one-api/relay/common"
+	"one-api/setting"
+)
+
+type PriceData struct {
+	ModelPrice             float64
+	ModelRatio             float64
+	GroupRatio             float64
+	UsePrice               bool
+	ShouldPreConsumedQuota int
+}
+
+func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) PriceData {
+	modelPrice, usePrice := common.GetModelPrice(info.OriginModelName, false)
+	groupRatio := setting.GetGroupRatio(info.Group)
+	var preConsumedQuota int
+	var modelRatio float64
+	if !usePrice {
+		preConsumedTokens := common.PreConsumedQuota
+		if maxTokens != 0 {
+			preConsumedTokens = promptTokens + maxTokens
+		}
+		modelRatio = common.GetModelRatio(info.OriginModelName)
+		ratio := modelRatio * groupRatio
+		preConsumedQuota = int(float64(preConsumedTokens) * ratio)
+	} else {
+		preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
+	}
+	return PriceData{
+		ModelPrice:             modelPrice,
+		ModelRatio:             modelRatio,
+		GroupRatio:             groupRatio,
+		UsePrice:               usePrice,
+		ShouldPreConsumedQuota: preConsumedQuota,
+	}
+}
--- a/relay/relay-audio.go
+++ b/relay/relay-audio.go
@@ -1,7 +1,6 @@
 package relay

 import (
-	"encoding/json"
 	"errors"
 	"fmt"
 	"github.com/gin-gonic/gin"
@@ -11,8 +10,10 @@ import (
 	"one-api/model"
 	relaycommon "one-api/relay/common"
 	relayconstant "one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
 	"one-api/setting"
+	"strings"
 )

 func getAndValidAudioRequest(c *gin.Context, info *relaycommon.RelayInfo) (*dto.AudioRequest, error) {
@@ -27,8 +28,9 @@ func getAndValidAudioRequest(c *gin.Context, info *relaycommon.RelayInfo) (*dto.
 			return nil, errors.New("model is required")
 		}
 		if setting.ShouldCheckPromptSensitive() {
-			err := service.CheckSensitiveInput(audioRequest.Input)
+			words, err := service.CheckSensitiveInput(audioRequest.Input)
 			if err != nil {
+				common.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ",")))
 				return nil, err
 			}
 		}
@@ -73,15 +75,13 @@ func AudioHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		relayInfo.PromptTokens = promptTokens
 	}

-	modelRatio := common.GetModelRatio(audioRequest.Model)
-	groupRatio := setting.GetGroupRatio(relayInfo.Group)
-	ratio := modelRatio * groupRatio
-	preConsumedQuota := int(float64(preConsumedTokens) * ratio)
+	priceData := helper.ModelPriceHelper(c, relayInfo, preConsumedTokens, 0)
+
 	userQuota, err := model.GetUserQuota(relayInfo.UserId, false)
 	if err != nil {
 		return service.OpenAIErrorWrapperLocal(err, "get_user_quota_failed", http.StatusInternalServerError)
 	}
-	preConsumedQuota, userQuota, openaiErr = preConsumeQuota(c, preConsumedQuota, relayInfo)
+	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo)
 	if openaiErr != nil {
 		return openaiErr
 	}
@@ -91,19 +91,12 @@ func AudioHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		}
 	}()

-	// map model name
-	modelMapping := c.GetString("model_mapping")
-	if modelMapping != "" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return service.OpenAIErrorWrapper(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[audioRequest.Model] != "" {
-			audioRequest.Model = modelMap[audioRequest.Model]
-		}
+	err = helper.ModelMappedHelper(c, relayInfo)
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "model_mapped_error", http.StatusInternalServerError)
 	}
-	relayInfo.UpstreamModelName = audioRequest.Model
+
+	audioRequest.Model = relayInfo.UpstreamModelName

 	adaptor := GetAdaptor(relayInfo.ApiType)
 	if adaptor == nil {
@@ -140,7 +133,7 @@ func AudioHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		return openaiErr
 	}

-	postConsumeQuota(c, relayInfo, audioRequest.Model, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, 0, false, "")
+	postConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, priceData, "")

 	return nil
 }
--- a/relay/relay-image.go
+++ b/relay/relay-image.go
@@ -12,6 +12,7 @@ import (
 	"one-api/dto"
 	"one-api/model"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
 	"one-api/setting"
 	"strings"
@@ -60,15 +61,16 @@ func getAndValidImageRequest(c *gin.Context, info *relaycommon.RelayInfo) (*dto.
 	//	return service.OpenAIErrorWrapper(errors.New("n must be between 1 and 10"), "invalid_field_value", http.StatusBadRequest)
 	//}
 	if setting.ShouldCheckPromptSensitive() {
-		err := service.CheckSensitiveInput(imageRequest.Prompt)
+		words, err := service.CheckSensitiveInput(imageRequest.Prompt)
 		if err != nil {
+			common.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ",")))
 			return nil, err
 		}
 	}
 	return imageRequest, nil
 }

-func ImageHelper(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
+func ImageHelper(c *gin.Context) *dto.OpenAIErrorWithStatusCode {
 	relayInfo := relaycommon.GenRelayInfo(c)

 	imageRequest, err := getAndValidImageRequest(c, relayInfo)
@@ -77,29 +79,20 @@ func ImageHelper(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
 		return service.OpenAIErrorWrapper(err, "invalid_image_request", http.StatusBadRequest)
 	}

-	// map model name
-	modelMapping := c.GetString("model_mapping")
-	if modelMapping != "" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return service.OpenAIErrorWrapper(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[imageRequest.Model] != "" {
-			imageRequest.Model = modelMap[imageRequest.Model]
-		}
+	err = helper.ModelMappedHelper(c, relayInfo)
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "model_mapped_error", http.StatusInternalServerError)
 	}
-	relayInfo.UpstreamModelName = imageRequest.Model

-	modelPrice, success := common.GetModelPrice(imageRequest.Model, true)
-	if !success {
-		modelRatio := common.GetModelRatio(imageRequest.Model)
+	imageRequest.Model = relayInfo.UpstreamModelName
+
+	priceData := helper.ModelPriceHelper(c, relayInfo, 0, 0)
+	if !priceData.UsePrice {
 		// modelRatio 16 = modelPrice $0.04
 		// per 1 modelRatio = $0.04 / 16
-		modelPrice = 0.0025 * modelRatio
+		priceData.ModelPrice = 0.0025 * priceData.ModelRatio
 	}

-	groupRatio := setting.GetGroupRatio(relayInfo.Group)
 	userQuota, err := model.GetUserQuota(relayInfo.UserId, false)

 	sizeRatio := 1.0
@@ -122,11 +115,11 @@ func ImageHelper(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
 		}
 	}

-	imageRatio := modelPrice * sizeRatio * qualityRatio * float64(imageRequest.N)
-	quota := int(imageRatio * groupRatio * common.QuotaPerUnit)
+	priceData.ModelPrice *= sizeRatio * qualityRatio * float64(imageRequest.N)
+	quota := int(priceData.ModelPrice * priceData.GroupRatio * common.QuotaPerUnit)

 	if userQuota-quota < 0 {
-		return service.OpenAIErrorWrapperLocal(errors.New(fmt.Sprintf("image pre-consumed quota failed, user quota: %d, need quota: %d", userQuota, quota)), "insufficient_user_quota", http.StatusBadRequest)
+		return service.OpenAIErrorWrapperLocal(fmt.Errorf("image pre-consumed quota failed, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(quota)), "insufficient_user_quota", http.StatusForbidden)
 	}

 	adaptor := GetAdaptor(relayInfo.ApiType)
@@ -184,7 +177,6 @@ func ImageHelper(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
 	}

 	logContent := fmt.Sprintf("大小 %s, 品质 %s", imageRequest.Size, quality)
-	postConsumeQuota(c, relayInfo, imageRequest.Model, usage, 0, 0, userQuota, 0, groupRatio, imageRatio, true, logContent)
-
+	postConsumeQuota(c, relayInfo, usage, 0, userQuota, priceData, logContent)
 	return nil
 }
--- a/relay/relay-text.go
+++ b/relay/relay-text.go
@@ -15,6 +15,7 @@ import (
 	"one-api/model"
 	relaycommon "one-api/relay/common"
 	relayconstant "one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
 	"one-api/setting"
 	"strings"
@@ -76,40 +77,21 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		return service.OpenAIErrorWrapperLocal(err, "invalid_text_request", http.StatusBadRequest)
 	}

-	// map model name
-	//isModelMapped := false
-	modelMapping := c.GetString("model_mapping")
-	//isModelMapped := false
-	if modelMapping != "" && modelMapping != "{}" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return service.OpenAIErrorWrapperLocal(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[textRequest.Model] != "" {
-			//isModelMapped = true
-			textRequest.Model = modelMap[textRequest.Model]
-			// set upstream model name
-			//isModelMapped = true
-		}
-	}
-	relayInfo.UpstreamModelName = textRequest.Model
-	relayInfo.RecodeModelName = textRequest.Model
-	modelPrice, getModelPriceSuccess := common.GetModelPrice(textRequest.Model, false)
-	groupRatio := setting.GetGroupRatio(relayInfo.Group)
-
-	var preConsumedQuota int
-	var ratio float64
-	var modelRatio float64
-	//err := service.SensitiveWordsCheck(textRequest)
-
 	if setting.ShouldCheckPromptSensitive() {
-		err = checkRequestSensitive(textRequest, relayInfo)
+		words, err := checkRequestSensitive(textRequest, relayInfo)
 		if err != nil {
+			common.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
 			return service.OpenAIErrorWrapperLocal(err, "sensitive_words_detected", http.StatusBadRequest)
 		}
 	}

+	err = helper.ModelMappedHelper(c, relayInfo)
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "model_mapped_error", http.StatusInternalServerError)
+	}
+
+	textRequest.Model = relayInfo.UpstreamModelName
+
 	// 获取 promptTokens，如果上下文中已经存在，则直接使用
 	var promptTokens int
 	if value, exists := c.Get("prompt_tokens"); exists {
@@ -124,20 +106,10 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		c.Set("prompt_tokens", promptTokens)
 	}

-	if !getModelPriceSuccess {
-		preConsumedTokens := common.PreConsumedQuota
-		if textRequest.MaxTokens != 0 {
-			preConsumedTokens = promptTokens + int(textRequest.MaxTokens)
-		}
-		modelRatio = common.GetModelRatio(textRequest.Model)
-		ratio = modelRatio * groupRatio
-		preConsumedQuota = int(float64(preConsumedTokens) * ratio)
-	} else {
-		preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
-	}
+	priceData := helper.ModelPriceHelper(c, relayInfo, promptTokens, int(textRequest.MaxTokens))

 	// pre-consume quota 预消耗配额
-	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, preConsumedQuota, relayInfo)
+	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo)
 	if openaiErr != nil {
 		return openaiErr
 	}
@@ -220,10 +192,10 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
 		return openaiErr
 	}

-	if strings.HasPrefix(relayInfo.RecodeModelName, "gpt-4o-audio") {
-		service.PostAudioConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "")
+	if strings.HasPrefix(relayInfo.OriginModelName, "gpt-4o-audio") {
+		service.PostAudioConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, priceData, "")
 	} else {
-		postConsumeQuota(c, relayInfo, relayInfo.RecodeModelName, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "")
+		postConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, priceData, "")
 	}
 	return nil
 }
@@ -248,19 +220,20 @@ func getPromptTokens(textRequest *dto.GeneralOpenAIRequest, info *relaycommon.Re
 	return promptTokens, err
 }

-func checkRequestSensitive(textRequest *dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) error {
+func checkRequestSensitive(textRequest *dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) ([]string, error) {
 	var err error
+	var words []string
 	switch info.RelayMode {
 	case relayconstant.RelayModeChatCompletions:
-		err = service.CheckSensitiveMessages(textRequest.Messages)
+		words, err = service.CheckSensitiveMessages(textRequest.Messages)
 	case relayconstant.RelayModeCompletions:
-		err = service.CheckSensitiveInput(textRequest.Prompt)
+		words, err = service.CheckSensitiveInput(textRequest.Prompt)
 	case relayconstant.RelayModeModerations:
-		err = service.CheckSensitiveInput(textRequest.Input)
+		words, err = service.CheckSensitiveInput(textRequest.Input)
 	case relayconstant.RelayModeEmbeddings:
-		err = service.CheckSensitiveInput(textRequest.Input)
+		words, err = service.CheckSensitiveInput(textRequest.Input)
 	}
-	return err
+	return words, err
 }

 // 预扣费并返回用户剩余配额
@@ -273,7 +246,7 @@ func preConsumeQuota(c *gin.Context, preConsumedQuota int, relayInfo *relaycommo
 		return 0, 0, service.OpenAIErrorWrapperLocal(errors.New("user quota is not enough"), "insufficient_user_quota", http.StatusForbidden)
 	}
 	if userQuota-preConsumedQuota < 0 {
-		return 0, 0, service.OpenAIErrorWrapperLocal(fmt.Errorf("chat pre-consumed quota failed, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(preConsumedQuota)), "insufficient_user_quota", http.StatusBadRequest)
+		return 0, 0, service.OpenAIErrorWrapperLocal(fmt.Errorf("chat pre-consumed quota failed, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(preConsumedQuota)), "insufficient_user_quota", http.StatusForbidden)
 	}
 	if userQuota > 100*preConsumedQuota {
 		// 用户额度充足，判断令牌额度是否充足
@@ -319,9 +292,8 @@ func returnPreConsumedQuota(c *gin.Context, relayInfo *relaycommon.RelayInfo, us
 	}
 }

-func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelName string,
-	usage *dto.Usage, ratio float64, preConsumedQuota int, userQuota int, modelRatio float64, groupRatio float64,
-	modelPrice float64, usePrice bool, extraContent string) {
+func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
+	usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {
 	if usage == nil {
 		usage = &dto.Usage{
 			PromptTokens:     relayInfo.PromptTokens,
@@ -333,12 +305,18 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelN
 	useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
 	promptTokens := usage.PromptTokens
 	completionTokens := usage.CompletionTokens
+	modelName := relayInfo.OriginModelName

 	tokenName := ctx.GetString("token_name")
 	completionRatio := common.GetCompletionRatio(modelName)
+	ratio := priceData.ModelRatio * priceData.GroupRatio
+	modelRatio := priceData.ModelRatio
+	groupRatio := priceData.GroupRatio
+	modelPrice := priceData.ModelPrice
+	usePrice := priceData.UsePrice

 	quota := 0
-	if !usePrice {
+	if !priceData.UsePrice {
 		quota = promptTokens + int(math.Round(float64(completionTokens)*completionRatio))
 		quota = int(math.Round(float64(quota) * ratio))
 		if ratio != 0 && quota <= 0 {
--- a/relay/relay_embedding.go
+++ b/relay/relay_embedding.go
@@ -10,8 +10,8 @@ import (
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
 	relayconstant "one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
-	"one-api/setting"
 )

 func getEmbeddingPromptToken(embeddingRequest dto.EmbeddingRequest) int {
@@ -47,43 +47,20 @@ func EmbeddingHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode)
 		return service.OpenAIErrorWrapperLocal(err, "invalid_embedding_request", http.StatusBadRequest)
 	}

-	// map model name
-	modelMapping := c.GetString("model_mapping")
-	//isModelMapped := false
-	if modelMapping != "" && modelMapping != "{}" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return service.OpenAIErrorWrapperLocal(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[embeddingRequest.Model] != "" {
-			embeddingRequest.Model = modelMap[embeddingRequest.Model]
-			// set upstream model name
-			//isModelMapped = true
-		}
+	err = helper.ModelMappedHelper(c, relayInfo)
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "model_mapped_error", http.StatusInternalServerError)
 	}

-	relayInfo.UpstreamModelName = embeddingRequest.Model
-	modelPrice, success := common.GetModelPrice(embeddingRequest.Model, false)
-	groupRatio := setting.GetGroupRatio(relayInfo.Group)
-
-	var preConsumedQuota int
-	var ratio float64
-	var modelRatio float64
+	embeddingRequest.Model = relayInfo.UpstreamModelName

 	promptToken := getEmbeddingPromptToken(*embeddingRequest)
-	if !success {
-		preConsumedTokens := promptToken
-		modelRatio = common.GetModelRatio(embeddingRequest.Model)
-		ratio = modelRatio * groupRatio
-		preConsumedQuota = int(float64(preConsumedTokens) * ratio)
-	} else {
-		preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
-	}
 	relayInfo.PromptTokens = promptToken

+	priceData := helper.ModelPriceHelper(c, relayInfo, promptToken, 0)
+
 	// pre-consume quota 预消耗配额
-	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, preConsumedQuota, relayInfo)
+	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo)
 	if openaiErr != nil {
 		return openaiErr
 	}
@@ -132,6 +109,6 @@ func EmbeddingHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode)
 		service.ResetStatusCode(openaiErr, statusCodeMappingStr)
 		return openaiErr
 	}
-	postConsumeQuota(c, relayInfo, embeddingRequest.Model, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, success, "")
+	postConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, priceData, "")
 	return nil
 }
--- a/relay/relay_rerank.go
+++ b/relay/relay_rerank.go
@@ -9,8 +9,8 @@ import (
 	"one-api/common"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
-	"one-api/setting"
 )

 func getRerankPromptToken(rerankRequest dto.RerankRequest) int {
@@ -40,43 +40,20 @@ func RerankHelper(c *gin.Context, relayMode int) (openaiErr *dto.OpenAIErrorWith
 		return service.OpenAIErrorWrapperLocal(fmt.Errorf("documents is empty"), "invalid_documents", http.StatusBadRequest)
 	}

-	// map model name
-	modelMapping := c.GetString("model_mapping")
-	//isModelMapped := false
-	if modelMapping != "" && modelMapping != "{}" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return service.OpenAIErrorWrapperLocal(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[rerankRequest.Model] != "" {
-			rerankRequest.Model = modelMap[rerankRequest.Model]
-			// set upstream model name
-			//isModelMapped = true
-		}
+	err = helper.ModelMappedHelper(c, relayInfo)
+	if err != nil {
+		return service.OpenAIErrorWrapperLocal(err, "model_mapped_error", http.StatusInternalServerError)
 	}

-	relayInfo.UpstreamModelName = rerankRequest.Model
-	modelPrice, success := common.GetModelPrice(rerankRequest.Model, false)
-	groupRatio := setting.GetGroupRatio(relayInfo.Group)
-
-	var preConsumedQuota int
-	var ratio float64
-	var modelRatio float64
+	rerankRequest.Model = relayInfo.UpstreamModelName

 	promptToken := getRerankPromptToken(*rerankRequest)
-	if !success {
-		preConsumedTokens := promptToken
-		modelRatio = common.GetModelRatio(rerankRequest.Model)
-		ratio = modelRatio * groupRatio
-		preConsumedQuota = int(float64(preConsumedTokens) * ratio)
-	} else {
-		preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
-	}
 	relayInfo.PromptTokens = promptToken

+	priceData := helper.ModelPriceHelper(c, relayInfo, promptToken, 0)
+
 	// pre-consume quota 预消耗配额
-	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, preConsumedQuota, relayInfo)
+	preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo)
 	if openaiErr != nil {
 		return openaiErr
 	}
@@ -124,6 +101,6 @@ func RerankHelper(c *gin.Context, relayMode int) (openaiErr *dto.OpenAIErrorWith
 		service.ResetStatusCode(openaiErr, statusCodeMappingStr)
 		return openaiErr
 	}
-	postConsumeQuota(c, relayInfo, rerankRequest.Model, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, success, "")
+	postConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, priceData, "")
 	return nil
 }
--- a/router/relay-router.go
+++ b/router/relay-router.go
@@ -24,6 +24,7 @@ func SetRelayRouter(router *gin.Engine) {
 	}
 	relayV1Router := router.Group("/v1")
 	relayV1Router.Use(middleware.TokenAuth())
+	relayV1Router.Use(middleware.ModelRequestRateLimit())
 	{
 		// WebSocket 路由
 		wsRouter := relayV1Router.Group("")
--- a/service/log_info_generate.go
+++ b/service/log_info_generate.go
@@ -16,6 +16,10 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
 	if relayInfo.ReasoningEffort != "" {
 		other["reasoning_effort"] = relayInfo.ReasoningEffort
 	}
+	if relayInfo.IsModelMapped {
+		other["is_model_mapped"] = true
+		other["upstream_model_name"] = relayInfo.UpstreamModelName
+	}
 	adminInfo := make(map[string]interface{})
 	adminInfo["use_channel"] = ctx.GetStringSlice("use_channel")
 	other["admin_info"] = adminInfo
--- a/service/quota.go
+++ b/service/quota.go
@@ -10,6 +10,7 @@ import (
 	"one-api/dto"
 	"one-api/model"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/setting"
 	"strings"
 	"time"
@@ -68,7 +69,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
 		return err
 	}

-	modelName := relayInfo.UpstreamModelName
+	modelName := relayInfo.OriginModelName
 	textInputTokens := usage.InputTokenDetails.TextTokens
 	textOutTokens := usage.OutputTokenDetails.TextTokens
 	audioInputTokens := usage.InputTokenDetails.AudioTokens
@@ -94,11 +95,11 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
 	quota := calculateAudioQuota(quotaInfo)

 	if userQuota < quota {
-		return errors.New(fmt.Sprintf("用户额度不足，剩余额度为 %d", userQuota))
+		return fmt.Errorf("user quota is not enough, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(quota))
 	}

 	if !token.UnlimitedQuota && token.RemainQuota < quota {
-		return errors.New(fmt.Sprintf("令牌额度不足，剩余额度为 %d", token.RemainQuota))
+		return fmt.Errorf("token quota is not enough, token remain quota: %s, need quota: %s", common.FormatQuota(token.RemainQuota), common.FormatQuota(quota))
 	}

 	err = PostConsumeQuota(relayInfo, quota, 0, false)
@@ -122,7 +123,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod

 	tokenName := ctx.GetString("token_name")
 	completionRatio := common.GetCompletionRatio(modelName)
-	audioRatio := common.GetAudioRatio(relayInfo.UpstreamModelName)
+	audioRatio := common.GetAudioRatio(relayInfo.OriginModelName)
 	audioCompletionRatio := common.GetAudioCompletionRatio(modelName)

 	quotaInfo := QuotaInfo{
@@ -173,8 +174,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
 }

 func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
-	usage *dto.Usage, preConsumedQuota int, userQuota int, modelRatio float64, groupRatio float64,
-	modelPrice float64, usePrice bool, extraContent string) {
+	usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {

 	useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
 	textInputTokens := usage.PromptTokensDetails.TextTokens
@@ -184,9 +184,14 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	audioOutTokens := usage.CompletionTokenDetails.AudioTokens

 	tokenName := ctx.GetString("token_name")
-	completionRatio := common.GetCompletionRatio(relayInfo.RecodeModelName)
-	audioRatio := common.GetAudioRatio(relayInfo.RecodeModelName)
-	audioCompletionRatio := common.GetAudioCompletionRatio(relayInfo.RecodeModelName)
+	completionRatio := common.GetCompletionRatio(relayInfo.OriginModelName)
+	audioRatio := common.GetAudioRatio(relayInfo.OriginModelName)
+	audioCompletionRatio := common.GetAudioCompletionRatio(relayInfo.OriginModelName)
+
+	modelRatio := priceData.ModelRatio
+	groupRatio := priceData.GroupRatio
+	modelPrice := priceData.ModelPrice
+	usePrice := priceData.UsePrice

 	quotaInfo := QuotaInfo{
 		InputDetails: TokenDetails{
@@ -197,7 +202,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 			TextTokens:  textOutTokens,
 			AudioTokens: audioOutTokens,
 		},
-		ModelName:  relayInfo.RecodeModelName,
+		ModelName:  relayInfo.OriginModelName,
 		UsePrice:   usePrice,
 		ModelRatio: modelRatio,
 		GroupRatio: groupRatio,
@@ -220,7 +225,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 		quota = 0
 		logContent += fmt.Sprintf("（可能是上游超时）")
 		common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
-			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.RecodeModelName, preConsumedQuota))
+			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, preConsumedQuota))
 	} else {
 		quotaDelta := quota - preConsumedQuota
 		if quotaDelta != 0 {
@@ -233,7 +238,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 		model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
 	}

-	logModel := relayInfo.RecodeModelName
+	logModel := relayInfo.OriginModelName
 	if extraContent != "" {
 		logContent += ", " + extraContent
 	}
@@ -257,7 +262,7 @@ func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {
 		return err
 	}
 	if !relayInfo.TokenUnlimited && token.RemainQuota < quota {
-		return errors.New("令牌额度不足")
+		return fmt.Errorf("token quota is not enough, token remain quota: %s, need quota: %s", common.FormatQuota(token.RemainQuota), common.FormatQuota(quota))
 	}
 	err = model.DecreaseTokenQuota(relayInfo.TokenId, relayInfo.TokenKey, quota)
 	if err != nil {
--- a/service/sensitive.go
+++ b/service/sensitive.go
@@ -8,48 +8,47 @@ import (
 	"strings"
 )

-func CheckSensitiveMessages(messages []dto.Message) error {
+func CheckSensitiveMessages(messages []dto.Message) ([]string, error) {
+	if len(messages) == 0 {
+		return nil, nil
+	}
+
 	for _, message := range messages {
-		if len(message.Content) > 0 {
-			if message.IsStringContent() {
-				stringContent := message.StringContent()
-				if ok, words := SensitiveWordContains(stringContent); ok {
-					return errors.New("sensitive words: " + strings.Join(words, ","))
-				}
+		arrayContent := message.ParseContent()
+		for _, m := range arrayContent {
+			if m.Type == "image_url" {
+				// TODO: check image url
+				continue
 			}
-		} else {
-			arrayContent := message.ParseContent()
-			for _, m := range arrayContent {
-				if m.Type == "image_url" {
-					// TODO: check image url
-				} else {
-					if ok, words := SensitiveWordContains(m.Text); ok {
-						return errors.New("sensitive words: " + strings.Join(words, ","))
-					}
-				}
+			// 检查 text 是否为空
+			if m.Text == "" {
+				continue
+			}
+			if ok, words := SensitiveWordContains(m.Text); ok {
+				return words, errors.New("sensitive words detected")
 			}
 		}
 	}
-	return nil
+	return nil, nil
 }

-func CheckSensitiveText(text string) error {
+func CheckSensitiveText(text string) ([]string, error) {
 	if ok, words := SensitiveWordContains(text); ok {
-		return errors.New("sensitive words: " + strings.Join(words, ","))
+		return words, errors.New("sensitive words detected")
 	}
-	return nil
+	return nil, nil
 }

-func CheckSensitiveInput(input any) error {
+func CheckSensitiveInput(input any) ([]string, error) {
 	switch v := input.(type) {
 	case string:
 		return CheckSensitiveText(v)
 	case []string:
-		text := ""
+		var builder strings.Builder
 		for _, s := range v {
-			text += s
+			builder.WriteString(s)
 		}
-		return CheckSensitiveText(text)
+		return CheckSensitiveText(builder.String())
 	}
 	return CheckSensitiveText(fmt.Sprintf("%v", input))
 }
@@ -59,8 +58,11 @@ func SensitiveWordContains(text string) (bool, []string) {
 	if len(setting.SensitiveWords) == 0 {
 		return false, nil
 	}
+	if len(text) == 0 {
+		return false, nil
+	}
 	checkText := strings.ToLower(text)
-	return AcSearch(checkText, setting.SensitiveWords, false)
+	return AcSearch(checkText, setting.SensitiveWords, true)
 }

 // SensitiveWordReplace 敏感词替换，返回是否包含敏感词和替换后的文本
@@ -72,14 +74,21 @@ func SensitiveWordReplace(text string, returnImmediately bool) (bool, []string,
 	m := InitAc(setting.SensitiveWords)
 	hits := m.MultiPatternSearch([]rune(checkText), returnImmediately)
 	if len(hits) > 0 {
-		words := make([]string, 0)
+		words := make([]string, 0, len(hits))
+		var builder strings.Builder
+		builder.Grow(len(text))
+		lastPos := 0
+
 		for _, hit := range hits {
 			pos := hit.Pos
 			word := string(hit.Word)
-			text = text[:pos] + "**###**" + text[pos+len(word):]
+			builder.WriteString(text[lastPos:pos])
+			builder.WriteString("**###**")
+			lastPos = pos + len(word)
 			words = append(words, word)
 		}
-		return true, words, text
+		builder.WriteString(text[lastPos:])
+		return true, words, builder.String()
 	}
 	return false, nil, text
 }
--- a/service/token_counter.go
+++ b/service/token_counter.go
@@ -78,6 +78,9 @@ func getTokenEncoder(model string) *tiktoken.Tiktoken {
 }

 func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
+	if text == "" {
+		return 0
+	}
 	return len(tokenEncoder.Encode(text, nil, nil))
 }

@@ -282,30 +285,25 @@ func CountTokenMessages(info *relaycommon.RelayInfo, messages []dto.Message, mod
 		tokenNum += tokensPerMessage
 		tokenNum += getTokenNum(tokenEncoder, message.Role)
 		if len(message.Content) > 0 {
-			if message.IsStringContent() {
-				stringContent := message.StringContent()
-				tokenNum += getTokenNum(tokenEncoder, stringContent)
-				if message.Name != nil {
-					tokenNum += tokensPerName
-					tokenNum += getTokenNum(tokenEncoder, *message.Name)
-				}
-			} else {
-				arrayContent := message.ParseContent()
-				for _, m := range arrayContent {
-					if m.Type == dto.ContentTypeImageURL {
-						imageUrl := m.ImageUrl.(dto.MessageImageUrl)
-						imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
-						if err != nil {
-							return 0, err
-						}
-						tokenNum += imageTokenNum
-						log.Printf("image token num: %d", imageTokenNum)
-					} else if m.Type == dto.ContentTypeInputAudio {
-						// TODO: 音频token数量计算
-						tokenNum += 100
-					} else {
-						tokenNum += getTokenNum(tokenEncoder, m.Text)
+			if message.Name != nil {
+				tokenNum += tokensPerName
+				tokenNum += getTokenNum(tokenEncoder, *message.Name)
+			}
+			arrayContent := message.ParseContent()
+			for _, m := range arrayContent {
+				if m.Type == dto.ContentTypeImageURL {
+					imageUrl := m.ImageUrl.(dto.MessageImageUrl)
+					imageTokenNum, err := getImageToken(info, &imageUrl, model, stream)
+					if err != nil {
+						return 0, err
 					}
+					tokenNum += imageTokenNum
+					log.Printf("image token num: %d", imageTokenNum)
+				} else if m.Type == dto.ContentTypeInputAudio {
+					// TODO: 音频token数量计算
+					tokenNum += 100
+				} else {
+					tokenNum += getTokenNum(tokenEncoder, m.Text)
 				}
 			}
 		}
--- a/setting/operation_setting.go
+++ b/setting/operation_setting.go
@@ -23,6 +23,7 @@ func AutomaticDisableKeywordsFromString(s string) {
 	ak := strings.Split(s, "\n")
 	for _, k := range ak {
 		k = strings.TrimSpace(k)
+		k = strings.ToLower(k)
 		if k != "" {
 			AutomaticDisableKeywords = append(AutomaticDisableKeywords, k)
 		}
--- a/setting/rate_limit.go
+++ b/setting/rate_limit.go
@@ -0,0 +1,6 @@
+package setting
+
+var ModelRequestRateLimitEnabled = false
+var ModelRequestRateLimitDurationMinutes = 1
+var ModelRequestRateLimitCount = 0
+var ModelRequestRateLimitSuccessCount = 1000
--- a/web/src/components/LogsTable.js
+++ b/web/src/components/LogsTable.js
@@ -15,7 +15,7 @@ import {
  Button, Descriptions,
  Form,
  Layout,
-  Modal,
+  Modal, Popover,
  Select,
  Space,
  Spin,
@@ -34,6 +34,7 @@ import {
 import Paragraph from '@douyinfe/semi-ui/lib/es/typography/paragraph';
 import { getLogOther } from '../helpers/other.js';
 import { StyleContext } from '../context/Style/index.js';
+import { IconInherit, IconRefresh } from '@douyinfe/semi-icons';

 const { Header } = Layout;

@@ -141,7 +142,78 @@ const LogsTable = () => {
        </Tag>
      );
    }
-  } 
+  }
+
+  function renderModelName(record) {
+
+    let other = getLogOther(record.other);
+    let modelMapped = other?.is_model_mapped && other?.upstream_model_name && other?.upstream_model_name !== '';
+    if (!modelMapped) {
+      return <Tag
+        color={stringToColor(record.model_name)}
+        size='large'
+        onClick={(event) => {
+          copyText(event, record.model_name).then(r => {});
+        }}
+      >
+        {' '}{record.model_name}{' '}
+      </Tag>;
+    } else {
+      return (
+        <>
+          <Space vertical align={'start'}>
+            <Popover content={
+              <div style={{padding: 10}}> 
+                <Space vertical align={'start'}>
+                  <Tag
+                    color={stringToColor(record.model_name)}
+                    size='large'
+                    onClick={(event) => {
+                      copyText(event, record.model_name).then(r => {});
+                    }}
+                  >
+                    {t('请求并计费模型')}{' '}{record.model_name}{' '}
+                  </Tag>
+                  <Tag
+                    color={stringToColor(other.upstream_model_name)}
+                    size='large'
+                    onClick={(event) => {
+                      copyText(event, other.upstream_model_name).then(r => {});
+                    }}
+                  >
+                    {t('实际模型')}{' '}{other.upstream_model_name}{' '}
+                  </Tag>
+                </Space>
+              </div>
+            }>
+              <Tag
+                color={stringToColor(record.model_name)}
+                size='large'
+                onClick={(event) => {
+                  copyText(event, record.model_name).then(r => {});
+                }}
+                suffixIcon={<IconRefresh style={{width: '0.8em', height: '0.8em', opacity: 0.6}} />}
+              >
+                {' '}{record.model_name}{' '}
+              </Tag>
+            </Popover>
+            {/*<Tooltip content={t('实际模型')}>*/}
+            {/*  <Tag*/}
+            {/*    color={stringToColor(other.upstream_model_name)}*/}
+            {/*    size='large'*/}
+            {/*    onClick={(event) => {*/}
+            {/*      copyText(event, other.upstream_model_name).then(r => {});*/}
+            {/*    }}*/}
+            {/*  >*/}
+            {/*    {' '}{other.upstream_model_name}{' '}*/}
+            {/*  </Tag>*/}
+            {/*</Tooltip>*/}
+          </Space>
+        </>
+      );
+    }
+
+  }

  const columns = [
    {
@@ -272,18 +344,7 @@ const LogsTable = () => {
      dataIndex: 'model_name',
      render: (text, record, index) => {
        return record.type === 0 || record.type === 2 ? (
-          <>
-            <Tag
-              color={stringToColor(text)}
-              size='large'
-              onClick={(event) => {
-                copyText(event, text);
-              }}
-            >
-              {' '}
-              {text}{' '}
-            </Tag>
-          </>
+          <>{renderModelName(record)}</>
        ) : (
          <></>
        );
@@ -580,6 +641,17 @@ const LogsTable = () => {
        value: logs[i].content,
      });
      if (logs[i].type === 2) {
+        let modelMapped = other?.is_model_mapped && other?.upstream_model_name && other?.upstream_model_name !== '';
+        if (modelMapped) {
+          expandDataLocal.push({
+            key: t('请求并计费模型'),
+            value: logs[i].model_name,
+          });
+          expandDataLocal.push({
+            key: t('实际模型'),
+            value: other.upstream_model_name,
+          });
+        }
        let content = '';
        if (other?.ws || other?.audio) {
          content = renderAudioModelPrice(
--- a/web/src/components/PersonalSetting.js
+++ b/web/src/components/PersonalSetting.js
@@ -330,7 +330,7 @@ const PersonalSetting = () => {
        try {
            const res = await API.put('/api/user/setting', {
                notify_type: notificationSettings.warningType,
-                quota_warning_threshold: notificationSettings.warningThreshold,
+                quota_warning_threshold: parseFloat(notificationSettings.warningThreshold),
                webhook_url: notificationSettings.webhookUrl,
                webhook_secret: notificationSettings.webhookSecret,
                notification_email: notificationSettings.notificationEmail
--- a/web/src/components/RateLimitSetting.js
+++ b/web/src/components/RateLimitSetting.js
@@ -0,0 +1,80 @@
+import React, { useEffect, useState } from 'react';
+import { Card, Spin, Tabs } from '@douyinfe/semi-ui';
+import SettingsGeneral from '../pages/Setting/Operation/SettingsGeneral.js';
+import SettingsDrawing from '../pages/Setting/Operation/SettingsDrawing.js';
+import SettingsSensitiveWords from '../pages/Setting/Operation/SettingsSensitiveWords.js';
+import SettingsLog from '../pages/Setting/Operation/SettingsLog.js';
+import SettingsDataDashboard from '../pages/Setting/Operation/SettingsDataDashboard.js';
+import SettingsMonitoring from '../pages/Setting/Operation/SettingsMonitoring.js';
+import SettingsCreditLimit from '../pages/Setting/Operation/SettingsCreditLimit.js';
+import SettingsMagnification from '../pages/Setting/Operation/SettingsMagnification.js';
+import ModelSettingsVisualEditor from '../pages/Setting/Operation/ModelSettingsVisualEditor.js';
+import GroupRatioSettings from '../pages/Setting/Operation/GroupRatioSettings.js';
+import ModelRatioSettings from '../pages/Setting/Operation/ModelRatioSettings.js';
+
+
+import { API, showError, showSuccess } from '../helpers';
+import SettingsChats from '../pages/Setting/Operation/SettingsChats.js';
+import { useTranslation } from 'react-i18next';
+import RequestRateLimit from '../pages/Setting/RateLimit/SettingsRequestRateLimit.js';
+
+const RateLimitSetting = () => {
+  const { t } = useTranslation();
+  let [inputs, setInputs] = useState({
+    ModelRequestRateLimitEnabled: false,
+    ModelRequestRateLimitCount: 0,
+    ModelRequestRateLimitSuccessCount: 1000,
+    ModelRequestRateLimitDurationMinutes: 1,
+  });
+
+  let [loading, setLoading] = useState(false);
+
+  const getOptions = async () => {
+    const res = await API.get('/api/option/');
+    const { success, message, data } = res.data;
+    if (success) {
+      let newInputs = {};
+      data.forEach((item) => {
+        if (
+          item.key.endsWith('Enabled')
+        ) {
+          newInputs[item.key] = item.value === 'true' ? true : false;
+        } else {
+          newInputs[item.key] = item.value;
+        }
+      });
+
+      setInputs(newInputs);
+    } else {
+      showError(message);
+    }
+  };
+  async function onRefresh() {
+    try {
+      setLoading(true);
+      await getOptions();
+      // showSuccess('刷新成功');
+    } catch (error) {
+      showError('刷新失败');
+    } finally {
+      setLoading(false);
+    }
+  }
+
+  useEffect(() => {
+    onRefresh();
+  }, []);
+
+  return (
+    <>
+      <Spin spinning={loading} size='large'>
+        {/* AI请求速率限制 */}
+        <Card style={{ marginTop: '10px' }}>
+          <RequestRateLimit options={inputs} refresh={onRefresh} />
+        </Card>
+      </Spin>
+    </>
+  );
+};
+
+export default RateLimitSetting;
--- a/web/src/i18n/locales/en.json
+++ b/web/src/i18n/locales/en.json
@@ -856,7 +856,7 @@
  "IP黑名单": "IP blacklist",
  "不允许的IP，一行一个": "IPs not allowed, one per line",
  "请选择该渠道所支持的模型": "Please select the model supported by this channel",
-  "次": "Second-rate",
+  "次": "times",
  "达到限速报错内容": "Error content when the speed limit is reached",
  "不填则使用默认报错": "If not filled in, the default error will be reported.",
  "Midjouney 设置 (可选)": "Midjouney settings (optional)",
@@ -1249,5 +1249,37 @@
  "已注销": "Logged out",
  "自动禁用关键词": "Automatic disable keywords",
  "一行一个，不区分大小写": "One line per keyword, not case-sensitive",
-  "当上游通道返回错误中包含这些关键词时（不区分大小写），自动禁用通道": "When the upstream channel returns an error containing these keywords (not case-sensitive), automatically disable the channel"
-}
+  "当上游通道返回错误中包含这些关键词时（不区分大小写），自动禁用通道": "When the upstream channel returns an error containing these keywords (not case-sensitive), automatically disable the channel",
+  "请求并计费模型": "Request and charge model",
+  "实际模型": "Actual model",
+  "渠道信息": "Channel information",
+  "通知设置": "Notification settings",
+  "Webhook地址": "Webhook URL",
+  "请输入Webhook地址，例如: https://example.com/webhook": "Please enter the Webhook URL, e.g.: https://example.com/webhook",
+  "邮件通知": "Email notification",
+  "Webhook通知": "Webhook notification",
+  "接口凭证（可选）": "Interface credentials (optional)",
+  "密钥将以 Bearer 方式添加到请求头中，用于验证webhook请求的合法性": "The secret will be added to the request header as a Bearer token to verify the legitimacy of the webhook request",
+  "Authorization: Bearer your-secret-key": "Authorization: Bearer your-secret-key",
+  "额度预警阈值": "Quota warning threshold",
+  "当剩余额度低于此数值时，系统将通过选择的方式发送通知": "When the remaining quota is lower than this value, the system will send a notification through the selected method",
+  "Webhook请求结构": "Webhook request structure",
+  "只支持https，系统将以 POST 方式发送通知，请确保地址可以接收 POST 请求": "Only https is supported, the system will send a notification through POST, please ensure the address can receive POST requests",
+  "保存设置": "Save settings",
+  "通知邮箱": "Notification email",
+  "设置用于接收额度预警的邮箱地址，不填则使用账号绑定的邮箱": "Set the email address for receiving quota warning notifications, if not set, the email address bound to the account will be used",
+  "留空则使用账号绑定的邮箱": "If left blank, the email address bound to the account will be used",
+  "代理站地址": "Base URL",
+  "对于官方渠道，new-api已经内置地址，除非是第三方代理站点或者Azure的特殊接入地址，否则不需要填写": "For official channels, the new-api has a built-in address. Unless it is a third-party proxy site or a special Azure access address, there is no need to fill it in",
+  "渠道额外设置": "Channel extra settings",
+  "模型请求速率限制": "Model request rate limit",
+  "启用用户模型请求速率限制（可能会影响高并发性能）": "Enable user model request rate limit (may affect high concurrency performance)",
+  "限制周期": "Limit period",
+  "用户每周期最多请求次数": "User max request times per period",
+  "用户每周期最多请求完成次数": "User max successful request times per period",
+  "包括失败请求的次数，0代表不限制": "Including failed request times, 0 means no limit",
+  "频率限制的周期（分钟）": "Rate limit period (minutes)",
+  "只包括请求成功的次数": "Only include successful request times",
+  "保存模型速率限制": "Save model rate limit settings",
+  "速率限制设置": "Rate limit settings"
+}
--- a/web/src/pages/Channel/EditChannel.js
+++ b/web/src/pages/Channel/EditChannel.js
@@ -540,21 +540,23 @@ const EditChannel = (props) => {
            value={inputs.name}
            autoComplete="new-password"
          />
-          {inputs.type !== 3 && inputs.type !== 8 && inputs.type !== 22 && inputs.type !== 36 && (
+          {inputs.type !== 3 && inputs.type !== 8 && inputs.type !== 22 && inputs.type !== 36 && inputs.type !== 45 && (
            <>
              <div style={{ marginTop: 10 }}>
-                <Typography.Text strong>{t('BaseURL')}：</Typography.Text>
+                <Typography.Text strong>{t('代理站地址')}：</Typography.Text>
              </div>
-              <Input
-                label={t('BaseURL')}
-                name="base_url"
-                placeholder={t('此项可选，用于通过代理站来进行 API 调用，末尾不要带/v1和/')}
-                onChange={(value) => {
-                  handleInputChange('base_url', value);
-                }}
-                value={inputs.base_url}
-                autoComplete="new-password"
-              />
+              <Tooltip content={t('对于官方渠道，new-api已经内置地址，除非是第三方代理站点或者Azure的特殊接入地址，否则不需要填写')}>
+                <Input
+                  label={t('代理站地址')}
+                  name="base_url"
+                  placeholder={t('此项可选，用于通过代理站来进行 API 调用，末尾不要带/v1和/')}
+                  onChange={(value) => {
+                    handleInputChange('base_url', value);
+                  }}
+                  value={inputs.base_url}
+                  autoComplete="new-password"
+                />
+              </Tooltip>
            </>
          )}
          <div style={{ marginTop: 10 }}>
--- a/web/src/pages/Setting/RateLimit/SettingsRequestRateLimit.js
+++ b/web/src/pages/Setting/RateLimit/SettingsRequestRateLimit.js
@@ -0,0 +1,159 @@
+import React, { useEffect, useState, useRef } from 'react';
+import { Button, Col, Form, Row, Spin } from '@douyinfe/semi-ui';
+import {
+  compareObjects,
+  API,
+  showError,
+  showSuccess,
+  showWarning,
+} from '../../../helpers';
+import { useTranslation } from 'react-i18next';
+
+export default function RequestRateLimit(props) {
+  const { t } = useTranslation();
+
+  const [loading, setLoading] = useState(false);
+  const [inputs, setInputs] = useState({
+    ModelRequestRateLimitEnabled: false,
+    ModelRequestRateLimitCount: -1,
+    ModelRequestRateLimitSuccessCount: 1000,
+    ModelRequestRateLimitDurationMinutes: 1
+  });
+  const refForm = useRef();
+  const [inputsRow, setInputsRow] = useState(inputs);
+
+  function onSubmit() {
+    const updateArray = compareObjects(inputs, inputsRow);
+    if (!updateArray.length) return showWarning(t('你似乎并没有修改什么'));
+    const requestQueue = updateArray.map((item) => {
+      let value = '';
+      if (typeof inputs[item.key] === 'boolean') {
+        value = String(inputs[item.key]);
+      } else {
+        value = inputs[item.key];
+      }
+      return API.put('/api/option/', {
+        key: item.key,
+        value,
+      });
+    });
+    setLoading(true);
+    Promise.all(requestQueue)
+      .then((res) => {
+        if (requestQueue.length === 1) {
+          if (res.includes(undefined)) return;
+        } else if (requestQueue.length > 1) {
+          if (res.includes(undefined)) return showError(t('部分保存失败，请重试'));
+        }
+        showSuccess(t('保存成功'));
+        props.refresh();
+      })
+      .catch(() => {
+        showError(t('保存失败，请重试'));
+      })
+      .finally(() => {
+        setLoading(false);
+      });
+  }
+
+  useEffect(() => {
+    const currentInputs = {};
+    for (let key in props.options) {
+      if (Object.keys(inputs).includes(key)) {
+        currentInputs[key] = props.options[key];
+      }
+    }
+    setInputs(currentInputs);
+    setInputsRow(structuredClone(currentInputs));
+    refForm.current.setValues(currentInputs);
+  }, [props.options]);
+
+  return (
+    <>
+      <Spin spinning={loading}>
+        <Form
+          values={inputs}
+          getFormApi={(formAPI) => (refForm.current = formAPI)}
+          style={{ marginBottom: 15 }}
+        >
+          <Form.Section text={t('模型请求速率限制')}>
+            <Row gutter={16}>
+              <Col span={8}>
+                <Form.Switch
+                  field={'ModelRequestRateLimitEnabled'}
+                  label={t('启用用户模型请求速率限制（可能会影响高并发性能）')}
+                  size='default'
+                  checkedText='｜'
+                  uncheckedText='〇'
+                  onChange={(value) => {
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitEnabled: value,
+                    });
+                  }}
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('限制周期')}
+                  step={1}
+                  min={0}
+                  suffix={t('分钟')}
+                  extraText={t('频率限制的周期（分钟）')}
+                  field={'ModelRequestRateLimitDurationMinutes'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitDurationMinutes: String(value),
+                    })
+                  }
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('用户每周期最多请求次数')}
+                  step={1}
+                  min={0}
+                  suffix={t('次')}
+                  extraText={t('包括失败请求的次数，0代表不限制')}
+                  field={'ModelRequestRateLimitCount'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitCount: String(value),
+                    })
+                  }
+                />
+              </Col>
+              <Col span={8}>
+                <Form.InputNumber
+                  label={t('用户每周期最多请求完成次数')}
+                  step={1}
+                  min={1}
+                  suffix={t('次')}
+                  extraText={t('只包括请求成功的次数')}
+                  field={'ModelRequestRateLimitSuccessCount'}
+                  onChange={(value) =>
+                    setInputs({
+                      ...inputs,
+                      ModelRequestRateLimitSuccessCount: String(value),
+                    })
+                  }
+                />
+              </Col>
+            </Row>
+            <Row>
+              <Button size='default' onClick={onSubmit}>
+                {t('保存模型速率限制')}
+              </Button>
+            </Row>
+          </Form.Section>
+        </Form>
+      </Spin>
+    </>
+  );
+}
--- a/web/src/pages/Setting/index.js
+++ b/web/src/pages/Setting/index.js
@@ -8,6 +8,7 @@ import { isRoot } from '../../helpers';
 import OtherSetting from '../../components/OtherSetting';
 import PersonalSetting from '../../components/PersonalSetting';
 import OperationSetting from '../../components/OperationSetting';
+import RateLimitSetting from '../../components/RateLimitSetting.js';

 const Setting = () => {
  const { t } = useTranslation();
@@ -28,6 +29,11 @@ const Setting = () => {
      content: <OperationSetting />,
      itemKey: 'operation',
    });
+    panes.push({
+      tab: t('速率限制设置'),
+      content: <RateLimitSetting />,
+      itemKey: 'ratelimit',
+    });
    panes.push({
      tab: t('系统设置'),
      content: <SystemSetting />,
Author	SHA1	Message	Date
Calcium-Ion	dc36fdedc2	Merge pull request #781 from zeyugao/main feat: Pass extra_body in OpenAI request to the backend	2025-02-24 16:29:48 +08:00
Calcium-Ion	3017882fa3	Merge pull request #783 from Calcium-Ion/rate-limit feat: Add model request rate limiting functionality	2025-02-24 16:29:23 +08:00
1808837298@qq.com	e9ba392af8	feat: Add model rate limit settings in system configuration	2025-02-24 16:27:20 +08:00
1808837298@qq.com	83a37e4653	feat: Add model request rate limiting functionality	2025-02-24 16:20:55 +08:00
1808837298@qq.com	b6f95dca41	feat: Add support for different Dify bot types and request URLs	2025-02-24 14:18:30 +08:00
1808837298@qq.com	7ff4cebdbe	feat: Enhance token counting and content parsing for messages	2025-02-24 14:18:15 +08:00
Elsa	af00f7b311	Pass extra_body to the backend	2025-02-24 10:52:55 +08:00
1808837298@qq.com	cc1d6e1c05	fix: Improve 429 error logging with detailed message	2025-02-23 21:26:31 +08:00
1808837298@qq.com	6c7a8c811c	fix typo	2025-02-23 17:27:33 +08:00
1808837298@qq.com	d5ab7d2d34	feat: Add thinking-to-content option in channel extra settings #780	2025-02-23 17:13:08 +08:00
1808837298@qq.com	115a181db3	feat: Add thinking-to-content conversion for stream responses	2025-02-23 17:05:57 +08:00
1808837298@qq.com	88a2fec190	fix: mistral	2025-02-22 16:29:48 +08:00
1808837298@qq.com	27ea231d66	fix: fix image ratio calculation	2025-02-22 15:50:18 +08:00
Calcium-Ion	4b6101b3ea	Merge pull request #778 from utopeadia/main 美化日志界面刷新图标	2025-02-22 15:21:28 +08:00
1808837298@qq.com	48926b8a5a	fix: Ensure correct quota warning threshold type conversion	2025-02-22 15:19:55 +08:00
1808837298@qq.com	c44a32efe0	chore: update rerank.md	2025-02-22 15:13:26 +08:00
HowieWood	c541d6c97e	进一步美化刷新图标	2025-02-22 14:18:25 +08:00
HowieWood	7dfcd135da	优化日志刷新图标显示	2025-02-22 14:12:49 +08:00
1808837298@qq.com	7a13fab271	fix: ShouldDisableChannel	2025-02-22 02:02:03 +08:00
1808837298@qq.com	bf75b30870	fix: mistral adaptor (close #774 )	2025-02-21 22:21:19 +08:00
1808837298@qq.com	6e7587ab46	feat: Add reasoning content support in OpenAI response handling	2025-02-21 18:52:51 +08:00
1808837298@qq.com	cc5066c510	refactor: Improve message content parsing with robust type handling	2025-02-21 18:27:43 +08:00
1808837298@qq.com	b9b69b01e5	refactor: Improve message content handling and quota error responses	2025-02-21 18:18:21 +08:00
1808837298@qq.com	1f4f9123aa	refactor: Optimize sensitive word detection and text processing	2025-02-21 17:05:35 +08:00
1808837298@qq.com	9cc6385b0c	feat: Enhance sensitive word detection with detailed logging	2025-02-21 16:57:30 +08:00
1808837298@qq.com	2d42145b66	refactor: Improve quota error messages with formatted quota display	2025-02-21 16:42:48 +08:00
1808837298@qq.com	94736407a0	feat: Add base URL input with localized tooltip for channel configuration	2025-02-21 16:17:59 +08:00
1808837298@qq.com	de859c3cc9	feat: Add localization for notification and webhook settings	2025-02-21 15:36:24 +08:00
Calcium-Ion	8dd4ce986c	Merge pull request #775 from Calcium-Ion/model_mappping refactor: Simplify model mapping and pricing logic across relay modules	2025-02-20 16:42:23 +08:00
1808837298@qq.com	06da65a9d0	refactor: Simplify model mapping and pricing logic across relay modules	2025-02-20 16:41:46 +08:00