fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload * fix(agents): note overload auth-profile fallback fix * fix(agents): classify overloaded failures separately * fix(agents): back off before overload failover * fix(agents): tighten overload probe and backoff state * fix(agents): persist overloaded cooldown across runs * fix(agents): tighten overloaded status handling * test(agents): add overload regression coverage * fix(agents): restore runner imports after rebase * test(agents): add overload fallback integration coverage * fix(agents): harden overloaded failover abort handling * test(agents): tighten overload classifier coverage * test(agents): cover all-overloaded fallback exhaustion * fix(cron): retry overloaded fallback summaries * fix(cron): treat HTTP 529 as overloaded retry
2026-05-10 15:24:58 +00:00 · 2026-03-07 01:42:11 +03:00
parent 110ca23bab
commit 6e962d8b9e
36 changed files with 1036 additions and 84 deletions
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -53,7 +53,7 @@ function expectPrimaryProbeSuccess(
  expect(result.result).toBe(expectedResult);
  expect(run).toHaveBeenCalledTimes(1);
  expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
-    allowRateLimitCooldownProbe: true,
+    allowTransientCooldownProbe: true,
  });
 }

@@ -200,10 +200,48 @@ describe("runWithModelFallback – probe logic", () => {
    expect(result.result).toBe("fallback-ok");
    expect(run).toHaveBeenCalledTimes(2);
    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
-      allowRateLimitCooldownProbe: true,
+      allowTransientCooldownProbe: true,
    });
    expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
-      allowRateLimitCooldownProbe: true,
+      allowTransientCooldownProbe: true,
+    });
+  });
+
+  it("attempts non-primary fallbacks during overloaded cooldown after primary probe failure", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    mockedIsProfileInCooldown.mockReturnValue(true);
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 1000);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("overloaded");
+
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(Object.assign(new Error("service overloaded"), { status: 503 }))
+      .mockResolvedValue("fallback-ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("fallback-ok");
+    expect(run).toHaveBeenCalledTimes(2);
+    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+    expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
+      allowTransientCooldownProbe: true,
    });
  });

@@ -326,10 +364,10 @@ describe("runWithModelFallback – probe logic", () => {
    });

    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
-      allowRateLimitCooldownProbe: true,
+      allowTransientCooldownProbe: true,
    });
    expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini", {
-      allowRateLimitCooldownProbe: true,
+      allowTransientCooldownProbe: true,
    });
  });
 });