From 38dc21271eaf8418345840788b92c45b0fb612fd Mon Sep 17 00:00:00 2001 From: Altay Date: Sun, 8 Mar 2026 10:06:30 +0300 Subject: [PATCH] fix(agents): simplify 402 recovery behavior --- src/agents/failover-error.test.ts | 15 +++++++ src/agents/model-fallback.probe.test.ts | 43 +------------------ src/agents/model-fallback.ts | 17 +++----- ...dded-helpers.isbillingerrormessage.test.ts | 8 ++++ src/agents/pi-embedded-helpers/errors.ts | 4 ++ 5 files changed, 34 insertions(+), 53 deletions(-) diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index c3f73e123a4..656aee7c066 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -226,6 +226,21 @@ describe("failover-error", () => { ).toBe("rate_limit"); }); + it("keeps explicit 402 rate-limit wrappers aligned with status-split payloads", () => { + const message = "rate limit exceeded"; + expect( + resolveFailoverReasonFromError({ + message: `HTTP 402 Payment Required: ${message}`, + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message, + }), + ).toBe("rate_limit"); + }); + it("infers format errors from error messages", () => { expect( resolveFailoverReasonFromError({ diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 480b8c31d38..01bcb2dc3a8 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -346,7 +346,7 @@ describe("runWithModelFallback – probe logic", () => { }); }); - it("probes billing-cooldowned primary when no fallback candidates exist", async () => { + it("skips billing-cooldowned primary when no fallback candidates exist", async () => { const cfg = makeCfg({ agents: { defaults: { @@ -363,54 +363,15 @@ describe("runWithModelFallback – probe logic", () => { mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); - const run = vi.fn().mockResolvedValue("billing-recovered"); - - const result = await runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-4.1-mini", - fallbacksOverride: [], - run, - }); - - expect(result.result).toBe("billing-recovered"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", { - allowTransientCooldownProbe: true, - }); - }); - - it("throttles billing probe for single-candidate at 30s intervals", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "openai/gpt-4.1-mini", - fallbacks: [], - }, - }, - }, - } as Partial); - - mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000); - mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); - - // Simulate a recent probe 10s ago - _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000); - - const run = vi.fn().mockResolvedValue("unreachable"); - await expect( runWithModelFallback({ cfg, provider: "openai", model: "gpt-4.1-mini", fallbacksOverride: [], - run, + run: vi.fn().mockResolvedValue("billing-recovered"), }), ).rejects.toThrow("All models failed"); - - expect(run).not.toHaveBeenCalled(); }); it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 3b6eb691088..ad2b5759233 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -429,19 +429,12 @@ function resolveCooldownDecision(params: { } // Billing is semi-persistent: the user may fix their balance, or a transient - // 402 might have been misclassified. Without fallback candidates, skipping is - // guaranteed failure so we attempt (throttled). With fallbacks, probe the - // primary when the standard probe schedule allows. + // 402 might have been misclassified. Probe the primary only when fallbacks + // exist; otherwise repeated single-provider probes just churn the disabled + // auth state without opening any recovery path. if (inferredReason === "billing") { - if (params.isPrimary) { - if (!params.hasFallbackCandidates) { - const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0; - if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) { - return { type: "attempt", reason: inferredReason, markProbe: true }; - } - } else if (shouldProbe) { - return { type: "attempt", reason: inferredReason, markProbe: true }; - } + if (params.isPrimary && params.hasFallbackCandidates && shouldProbe) { + return { type: "attempt", reason: inferredReason, markProbe: true }; } return { type: "skip", diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 097657acd95..ddc0edee636 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -571,6 +571,14 @@ describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => expect(classifyFailoverReason(`402 Payment Required: ${billingMessage}`)).toBe("billing"); expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing"); }); + + it("keeps explicit 402 rate-limit messages in the rate_limit lane", () => { + const transientMessage = "rate limit exceeded"; + expect(classifyFailoverReason(`HTTP 402 Payment Required: ${transientMessage}`)).toBe( + "rate_limit", + ); + expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit"); + }); }); describe("classifyFailoverReason", () => { diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index fac44b8888c..0cedd829cea 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -277,6 +277,10 @@ function classify402Message(message: string): PaymentRequiredFailoverReason { return "billing"; } + if (isRateLimitErrorMessage(normalized)) { + return "rate_limit"; + } + if (hasRetryable402TransientSignal(normalized)) { return "rate_limit"; }