From 652099cd5cefa66e174f6a83646edc4f1d620513 Mon Sep 17 00:00:00 2001 From: Alice Losasso <104875499+dddabtc@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:32:53 -0400 Subject: [PATCH] fix: correctly identify Groq TPM limits as rate limits instead of context overflow (#16176) Co-authored-by: Howard --- src/agents/pi-embedded-helpers/errors.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 86ded785629..6a40f1d7b1d 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -39,6 +39,12 @@ export function isContextOverflowError(errorMessage?: string): boolean { return false; } const lower = errorMessage.toLowerCase(); + + // Groq uses 413 for TPM (tokens per minute) limits, which is a rate limit, not context overflow. + if (lower.includes("tpm") || lower.includes("tokens per minute")) { + return false; + } + const hasRequestSizeExceeds = lower.includes("request size exceeds"); const hasContextWindow = lower.includes("context window") || @@ -72,6 +78,13 @@ export function isLikelyContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) { return false; } + + // Groq uses 413 for TPM (tokens per minute) limits, which is a rate limit, not context overflow. + const lower = errorMessage.toLowerCase(); + if (lower.includes("tpm") || lower.includes("tokens per minute")) { + return false; + } + if (CONTEXT_WINDOW_TOO_SMALL_RE.test(errorMessage)) { return false; } @@ -571,6 +584,8 @@ const ERROR_PATTERNS = { "quota exceeded", "resource_exhausted", "usage limit", + "tpm", + "tokens per minute", ], overloaded: [ /overloaded_error|"type"\s*:\s*"overloaded_error"/i,