fix: add rate limit patterns for 'too many tokens' and 'tokens per day' (#39377)

Merged via squash.

Prepared head SHA: 132a457286
Co-authored-by: gambletan <266203672+gambletan@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
gambletan
2026-03-08 18:03:33 +08:00
committed by GitHub
parent aedf3ee68f
commit 8a20f51460
7 changed files with 98 additions and 3 deletions

View File

@@ -416,12 +416,19 @@ describe("isLikelyContextOverflowError", () => {
"exceeded your current quota",
"This request would exceed your account's rate limit",
"429 Too Many Requests: request exceeds rate limit",
"AWS Bedrock: Too many tokens per day. Please try again tomorrow.",
];
for (const sample of samples) {
expect(isLikelyContextOverflowError(sample)).toBe(false);
}
});
it("keeps too-many-tokens-per-request context overflow errors out of the rate-limit lane", () => {
const sample = "Context window exceeded: too many tokens per request.";
expect(isLikelyContextOverflowError(sample)).toBe(true);
expect(classifyFailoverReason(sample)).toBeNull();
});
it("excludes reasoning-required invalid-request errors", () => {
const samples = [
"400 Reasoning is mandatory for this endpoint and cannot be disabled.",
@@ -654,6 +661,11 @@ describe("classifyFailoverReason", () => {
"rate_limit",
);
});
it("classifies AWS Bedrock too-many-tokens-per-day errors as rate_limit", () => {
expect(
classifyFailoverReason("AWS Bedrock: Too many tokens per day. Please try again tomorrow."),
).toBe("rate_limit");
});
it("classifies provider high-demand / service-unavailable messages as overloaded", () => {
expect(
classifyFailoverReason(

View File

@@ -122,7 +122,7 @@ const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
const CONTEXT_OVERFLOW_HINT_RE =
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
const RATE_LIMIT_HINT_RE =
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b|tokens per day/i;
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {

View File

@@ -14,6 +14,7 @@ const ERROR_PATTERNS = {
"usage limit",
/\btpm\b/i,
"tokens per minute",
"tokens per day",
],
overloaded: [
/overloaded_error|"type"\s*:\s*"overloaded_error"/i,