mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-18 23:57:27 +00:00
🛡️ fix: prevent OOM on large/decompressed requests; skip heavy prompt meta when token count is disabled
Clamp request body size (including post-decompression) to avoid memory exhaustion caused by huge payloads/zip bombs, especially with large-context Claude requests. Add a configurable `MAX_REQUEST_BODY_MB` (default `32`) and document it. - Enforce max request body size after gzip/br decompression via `http.MaxBytesReader` - Add a secondary size guard in `common.GetRequestBody` and cache-safe handling - Return **413 Request Entity Too Large** on oversized bodies in relay entry - Avoid building large `TokenCountMeta.CombineText` when both token counting and sensitive check are disabled (use lightweight meta for pricing) - Update READMEs (CN/EN/FR/JA) with `MAX_REQUEST_BODY_MB` - Fix a handful of vet/formatting issues encountered during the change - `go test ./...` passes
This commit is contained in:
@@ -114,7 +114,7 @@ func DiscordOAuth(c *gin.Context) {
|
||||
DiscordBind(c)
|
||||
return
|
||||
}
|
||||
if !system_setting.GetDiscordSettings().Enabled {
|
||||
if !system_setting.GetDiscordSettings().Enabled {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"success": false,
|
||||
"message": "管理员未开启通过 Discord 登录以及注册",
|
||||
|
||||
@@ -2,6 +2,7 @@ package controller
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -104,7 +105,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
|
||||
|
||||
request, err := helper.GetAndValidateRequest(c, relayFormat)
|
||||
if err != nil {
|
||||
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
|
||||
// Map "request body too large" to 413 so clients can handle it correctly
|
||||
if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
|
||||
newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
|
||||
} else {
|
||||
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -114,9 +120,17 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
|
||||
return
|
||||
}
|
||||
|
||||
meta := request.GetTokenCountMeta()
|
||||
needSensitiveCheck := setting.ShouldCheckPromptSensitive()
|
||||
needCountToken := constant.CountToken
|
||||
// Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
|
||||
var meta *types.TokenCountMeta
|
||||
if needSensitiveCheck || needCountToken {
|
||||
meta = request.GetTokenCountMeta()
|
||||
} else {
|
||||
meta = fastTokenCountMetaForPricing(request)
|
||||
}
|
||||
|
||||
if setting.ShouldCheckPromptSensitive() {
|
||||
if needSensitiveCheck && meta != nil {
|
||||
contains, words := service.CheckSensitiveText(meta.CombineText)
|
||||
if contains {
|
||||
logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
|
||||
@@ -218,6 +232,33 @@ func addUsedChannel(c *gin.Context, channelId int) {
|
||||
c.Set("use_channel", useChannel)
|
||||
}
|
||||
|
||||
func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
|
||||
if request == nil {
|
||||
return &types.TokenCountMeta{}
|
||||
}
|
||||
meta := &types.TokenCountMeta{
|
||||
TokenType: types.TokenTypeTokenizer,
|
||||
}
|
||||
switch r := request.(type) {
|
||||
case *dto.GeneralOpenAIRequest:
|
||||
if r.MaxCompletionTokens > r.MaxTokens {
|
||||
meta.MaxTokens = int(r.MaxCompletionTokens)
|
||||
} else {
|
||||
meta.MaxTokens = int(r.MaxTokens)
|
||||
}
|
||||
case *dto.OpenAIResponsesRequest:
|
||||
meta.MaxTokens = int(r.MaxOutputTokens)
|
||||
case *dto.ClaudeRequest:
|
||||
meta.MaxTokens = int(r.MaxTokens)
|
||||
case *dto.ImageRequest:
|
||||
// Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
|
||||
return r.GetTokenCountMeta()
|
||||
default:
|
||||
// Best-effort: leave CombineText empty to avoid large allocations.
|
||||
}
|
||||
return meta
|
||||
}
|
||||
|
||||
func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
|
||||
if info.ChannelMeta == nil {
|
||||
autoBan := c.GetBool("auto_ban")
|
||||
|
||||
@@ -88,7 +88,7 @@ func UpdateSunoTaskAll(ctx context.Context, taskChannelM map[int][]string, taskM
|
||||
for channelId, taskIds := range taskChannelM {
|
||||
err := updateSunoTaskAll(ctx, channelId, taskIds, taskM)
|
||||
if err != nil {
|
||||
logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %d", channelId, err.Error()))
|
||||
logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %s", channelId, err.Error()))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -141,7 +141,7 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
|
||||
return err
|
||||
}
|
||||
if !responseItems.IsSuccess() {
|
||||
common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %d", channelId, len(taskIds), string(responseBody)))
|
||||
common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %s", channelId, len(taskIds), string(responseBody)))
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -7,12 +7,12 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
"github.com/QuantumNous/new-api/model"
|
||||
"github.com/QuantumNous/new-api/setting"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
Reference in New Issue
Block a user