🛡️ fix: prevent OOM on large/decompressed requests; skip heavy prompt meta when token count is disabled

Clamp request body size (including post-decompression) to avoid memory exhaustion caused by huge payloads/zip bombs, especially with large-context Claude requests. Add a configurable `MAX_REQUEST_BODY_MB` (default `32`) and document it.

- Enforce max request body size after gzip/br decompression via `http.MaxBytesReader`
- Add a secondary size guard in `common.GetRequestBody` and cache-safe handling
- Return **413 Request Entity Too Large** on oversized bodies in relay entry
- Avoid building large `TokenCountMeta.CombineText` when both token counting and sensitive check are disabled (use lightweight meta for pricing)
- Update READMEs (CN/EN/FR/JA) with `MAX_REQUEST_BODY_MB`
- Fix a handful of vet/formatting issues encountered during the change
- `go test ./...` passes
This commit is contained in:
t0ng7u
2025-12-16 17:00:19 +08:00
parent 11593bd3da
commit 8e3f9b1faa
21 changed files with 149 additions and 35 deletions

View File

@@ -2,6 +2,7 @@ package controller
import (
"bytes"
"errors"
"fmt"
"io"
"log"
@@ -104,7 +105,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
request, err := helper.GetAndValidateRequest(c, relayFormat)
if err != nil {
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
// Map "request body too large" to 413 so clients can handle it correctly
if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
} else {
newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
}
return
}
@@ -114,9 +120,17 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
return
}
meta := request.GetTokenCountMeta()
needSensitiveCheck := setting.ShouldCheckPromptSensitive()
needCountToken := constant.CountToken
// Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
var meta *types.TokenCountMeta
if needSensitiveCheck || needCountToken {
meta = request.GetTokenCountMeta()
} else {
meta = fastTokenCountMetaForPricing(request)
}
if setting.ShouldCheckPromptSensitive() {
if needSensitiveCheck && meta != nil {
contains, words := service.CheckSensitiveText(meta.CombineText)
if contains {
logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
@@ -218,6 +232,33 @@ func addUsedChannel(c *gin.Context, channelId int) {
c.Set("use_channel", useChannel)
}
func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
if request == nil {
return &types.TokenCountMeta{}
}
meta := &types.TokenCountMeta{
TokenType: types.TokenTypeTokenizer,
}
switch r := request.(type) {
case *dto.GeneralOpenAIRequest:
if r.MaxCompletionTokens > r.MaxTokens {
meta.MaxTokens = int(r.MaxCompletionTokens)
} else {
meta.MaxTokens = int(r.MaxTokens)
}
case *dto.OpenAIResponsesRequest:
meta.MaxTokens = int(r.MaxOutputTokens)
case *dto.ClaudeRequest:
meta.MaxTokens = int(r.MaxTokens)
case *dto.ImageRequest:
// Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
return r.GetTokenCountMeta()
default:
// Best-effort: leave CombineText empty to avoid large allocations.
}
return meta
}
func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
if info.ChannelMeta == nil {
autoBan := c.GetBool("auto_ban")