fix(agents): comprehensive quota fallback fixes - session overrides + surgical cooldown logic (#23816)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: e6f2b4742b
Co-authored-by: ramezgaberiel <844893+ramezgaberiel@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Ramez
2026-02-25 19:35:40 -06:00
committed by GitHub
parent 0cc3e8137c
commit acbb93be48
4 changed files with 422 additions and 54 deletions

View File

@@ -163,7 +163,7 @@ describe("runWithModelFallback probe logic", () => {
expectPrimaryProbeSuccess(result, run, "recovered");
});
it("does NOT probe non-primary candidates during cooldown", async () => {
it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => {
const cfg = makeCfg({
agents: {
defaults: {
@@ -182,25 +182,23 @@ describe("runWithModelFallback probe logic", () => {
const almostExpired = NOW + 30 * 1000; // 30s remaining
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
// Primary probe fails with 429
// Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns.
const run = vi
.fn()
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
.mockResolvedValue("should-not-reach");
.mockResolvedValue("fallback-ok");
try {
await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
run,
});
expect.unreachable("should have thrown since all candidates exhausted");
} catch {
// Primary was probed (i === 0 + within margin), non-primary were skipped
expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
}
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
run,
});
expect(result.result).toBe("fallback-ok");
expect(run).toHaveBeenCalledTimes(2);
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
});
it("throttles probe when called within 30s interval", async () => {