mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 02:25:00 +00:00
🛡️ fix: prevent OOM on large/decompressed requests; skip heavy prompt meta when token count is disabled
Clamp request body size (including post-decompression) to avoid memory exhaustion caused by huge payloads/zip bombs, especially with large-context Claude requests. Add a configurable `MAX_REQUEST_BODY_MB` (default `32`) and document it. - Enforce max request body size after gzip/br decompression via `http.MaxBytesReader` - Add a secondary size guard in `common.GetRequestBody` and cache-safe handling - Return **413 Request Entity Too Large** on oversized bodies in relay entry - Avoid building large `TokenCountMeta.CombineText` when both token counting and sensitive check are disabled (use lightweight meta for pricing) - Update READMEs (CN/EN/FR/JA) with `MAX_REQUEST_BODY_MB` - Fix a handful of vet/formatting issues encountered during the change - `go test ./...` passes
This commit is contained in:
@@ -2,6 +2,7 @@ package controller
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -104,7 +105,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
|
||||
|
||||
request, err := helper.GetAndValidateRequest(c, relayFormat)
|
||||
if err != nil {
|
||||
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
|
||||
// Map "request body too large" to 413 so clients can handle it correctly
|
||||
if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
|
||||
newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
|
||||
} else {
|
||||
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -114,9 +120,17 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
|
||||
return
|
||||
}
|
||||
|
||||
meta := request.GetTokenCountMeta()
|
||||
needSensitiveCheck := setting.ShouldCheckPromptSensitive()
|
||||
needCountToken := constant.CountToken
|
||||
// Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
|
||||
var meta *types.TokenCountMeta
|
||||
if needSensitiveCheck || needCountToken {
|
||||
meta = request.GetTokenCountMeta()
|
||||
} else {
|
||||
meta = fastTokenCountMetaForPricing(request)
|
||||
}
|
||||
|
||||
if setting.ShouldCheckPromptSensitive() {
|
||||
if needSensitiveCheck && meta != nil {
|
||||
contains, words := service.CheckSensitiveText(meta.CombineText)
|
||||
if contains {
|
||||
logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
|
||||
@@ -218,6 +232,33 @@ func addUsedChannel(c *gin.Context, channelId int) {
|
||||
c.Set("use_channel", useChannel)
|
||||
}
|
||||
|
||||
func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
|
||||
if request == nil {
|
||||
return &types.TokenCountMeta{}
|
||||
}
|
||||
meta := &types.TokenCountMeta{
|
||||
TokenType: types.TokenTypeTokenizer,
|
||||
}
|
||||
switch r := request.(type) {
|
||||
case *dto.GeneralOpenAIRequest:
|
||||
if r.MaxCompletionTokens > r.MaxTokens {
|
||||
meta.MaxTokens = int(r.MaxCompletionTokens)
|
||||
} else {
|
||||
meta.MaxTokens = int(r.MaxTokens)
|
||||
}
|
||||
case *dto.OpenAIResponsesRequest:
|
||||
meta.MaxTokens = int(r.MaxOutputTokens)
|
||||
case *dto.ClaudeRequest:
|
||||
meta.MaxTokens = int(r.MaxTokens)
|
||||
case *dto.ImageRequest:
|
||||
// Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
|
||||
return r.GetTokenCountMeta()
|
||||
default:
|
||||
// Best-effort: leave CombineText empty to avoid large allocations.
|
||||
}
|
||||
return meta
|
||||
}
|
||||
|
||||
func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
|
||||
if info.ChannelMeta == nil {
|
||||
autoBan := c.GetBool("auto_ban")
|
||||
|
||||
Reference in New Issue
Block a user