fix: auto-recover primary model after rate-limit cooldown expires (#17478) (#18045)

Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: f7a7865727 Co-authored-by: PlayerGhost <28265945+PlayerGhost@users.noreply.github.com> Co-authored-by: sebslight <19554889+sebslight@users.noreply.github.com> Reviewed-by: @sebslight
2026-05-08 13:11:22 +00:00 · 2026-02-16 10:03:35 -03:00
parent 244ed9db39
commit 39bb1b3322
5 changed files with 435 additions and 8 deletions
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -0,0 +1,358 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
+import type { AuthProfileStore } from "./auth-profiles.js";
+
+// Mock auth-profiles module — must be before importing model-fallback
+vi.mock("./auth-profiles.js", () => ({
+  ensureAuthProfileStore: vi.fn(),
+  getSoonestCooldownExpiry: vi.fn(),
+  isProfileInCooldown: vi.fn(),
+  resolveAuthProfileOrder: vi.fn(),
+}));
+
+import {
+  ensureAuthProfileStore,
+  getSoonestCooldownExpiry,
+  isProfileInCooldown,
+  resolveAuthProfileOrder,
+} from "./auth-profiles.js";
+import { _probeThrottleInternals, runWithModelFallback } from "./model-fallback.js";
+
+const mockedEnsureAuthProfileStore = vi.mocked(ensureAuthProfileStore);
+const mockedGetSoonestCooldownExpiry = vi.mocked(getSoonestCooldownExpiry);
+const mockedIsProfileInCooldown = vi.mocked(isProfileInCooldown);
+const mockedResolveAuthProfileOrder = vi.mocked(resolveAuthProfileOrder);
+
+function makeCfg(overrides: Partial<OpenClawConfig> = {}): OpenClawConfig {
+  return {
+    agents: {
+      defaults: {
+        model: {
+          primary: "openai/gpt-4.1-mini",
+          fallbacks: ["anthropic/claude-haiku-3-5"],
+        },
+      },
+    },
+    ...overrides,
+  } as OpenClawConfig;
+}
+
+describe("runWithModelFallback – probe logic", () => {
+  let realDateNow: () => number;
+  const NOW = 1_700_000_000_000;
+
+  beforeEach(() => {
+    realDateNow = Date.now;
+    Date.now = vi.fn(() => NOW);
+
+    // Clear throttle state between tests
+    _probeThrottleInternals.lastProbeAttempt.clear();
+
+    // Default: ensureAuthProfileStore returns a fake store
+    const fakeStore: AuthProfileStore = {
+      version: 1,
+      profiles: {},
+    };
+    mockedEnsureAuthProfileStore.mockReturnValue(fakeStore);
+
+    // Default: resolveAuthProfileOrder returns profiles only for "openai" provider
+    mockedResolveAuthProfileOrder.mockImplementation(({ provider }: { provider: string }) => {
+      if (provider === "openai") {
+        return ["openai-profile-1"];
+      }
+      if (provider === "anthropic") {
+        return ["anthropic-profile-1"];
+      }
+      if (provider === "google") {
+        return ["google-profile-1"];
+      }
+      return [];
+    });
+    // Default: only openai profiles are in cooldown; fallback providers are available
+    mockedIsProfileInCooldown.mockImplementation((_store, profileId: string) => {
+      return profileId.startsWith("openai");
+    });
+  });
+
+  afterEach(() => {
+    Date.now = realDateNow;
+    vi.restoreAllMocks();
+  });
+
+  it("skips primary model when far from cooldown expiry (30 min remaining)", async () => {
+    const cfg = makeCfg();
+    // Cooldown expires in 30 min — well beyond the 2-min margin
+    const expiresIn30Min = NOW + 30 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+
+    const run = vi.fn().mockResolvedValue("ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    // Should skip primary and use fallback
+    expect(result.result).toBe("ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
+    expect(result.attempts[0]?.reason).toBe("rate_limit");
+  });
+
+  it("probes primary model when within 2-min margin of cooldown expiry", async () => {
+    const cfg = makeCfg();
+    // Cooldown expires in 1 minute — within 2-min probe margin
+    const expiresIn1Min = NOW + 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
+
+    const run = vi.fn().mockResolvedValue("probed-ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    // Should probe primary and succeed
+    expect(result.result).toBe("probed-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("probes primary model when cooldown already expired", async () => {
+    const cfg = makeCfg();
+    // Cooldown expired 5 min ago
+    const expiredAlready = NOW - 5 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiredAlready);
+
+    const run = vi.fn().mockResolvedValue("recovered");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("recovered");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("does NOT probe non-primary candidates during cooldown", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    // Override: ALL providers in cooldown for this test
+    mockedIsProfileInCooldown.mockReturnValue(true);
+
+    // All profiles in cooldown, cooldown just about to expire
+    const almostExpired = NOW + 30 * 1000; // 30s remaining
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    // Primary probe fails with 429
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
+      .mockResolvedValue("should-not-reach");
+
+    try {
+      await runWithModelFallback({
+        cfg,
+        provider: "openai",
+        model: "gpt-4.1-mini",
+        run,
+      });
+      expect.unreachable("should have thrown since all candidates exhausted");
+    } catch {
+      // Primary was probed (i === 0 + within margin), non-primary were skipped
+      expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
+      expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+    }
+  });
+
+  it("throttles probe when called within 30s interval", async () => {
+    const cfg = makeCfg();
+    // Cooldown just about to expire (within probe margin)
+    const almostExpired = NOW + 30 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    // Simulate a recent probe 10s ago
+    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
+
+    const run = vi.fn().mockResolvedValue("ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    // Should be throttled → skip primary, use fallback
+    expect(result.result).toBe("ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
+    expect(result.attempts[0]?.reason).toBe("rate_limit");
+  });
+
+  it("allows probe when 30s have passed since last probe", async () => {
+    const cfg = makeCfg();
+    const almostExpired = NOW + 30 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    // Last probe was 31s ago — should NOT be throttled
+    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 31_000);
+
+    const run = vi.fn().mockResolvedValue("probed-ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("probed-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("handles non-finite soonest safely (treats as probe-worthy)", async () => {
+    const cfg = makeCfg();
+
+    // Return Infinity — should be treated as "probe" per the guard
+    mockedGetSoonestCooldownExpiry.mockReturnValue(Infinity);
+
+    const run = vi.fn().mockResolvedValue("ok-infinity");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("ok-infinity");
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("handles NaN soonest safely (treats as probe-worthy)", async () => {
+    const cfg = makeCfg();
+
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NaN);
+
+    const run = vi.fn().mockResolvedValue("ok-nan");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("ok-nan");
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("handles null soonest safely (treats as probe-worthy)", async () => {
+    const cfg = makeCfg();
+
+    mockedGetSoonestCooldownExpiry.mockReturnValue(null);
+
+    const run = vi.fn().mockResolvedValue("ok-null");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("ok-null");
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
+  });
+
+  it("single candidate (no fallbacks) → no probe, normal skip behavior", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [], // no fallbacks
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    // Cooldown expires within probe margin
+    const almostExpired = NOW + 30 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    const run = vi.fn().mockResolvedValue("should-not-probe");
+
+    // With single candidate + hasFallbackCandidates === false,
+    // shouldProbe is false → skip with rate_limit
+    await expect(
+      runWithModelFallback({
+        cfg,
+        provider: "openai",
+        model: "gpt-4.1-mini",
+        fallbacksOverride: [],
+        run,
+      }),
+    ).rejects.toThrow();
+
+    // run should still be called once (single candidate, no fallbacks = try it directly?
+    // Actually with all profiles in cooldown and no fallback candidates,
+    // it skips the primary and throws "all candidates exhausted"
+    // Let's verify the attempt shows rate_limit
+  });
+
+  it("single candidate skips with rate_limit and exhausts candidates", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    // Cooldown within probe margin — but probe only applies when hasFallbackCandidates
+    const almostExpired = NOW + 30 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    const run = vi.fn().mockResolvedValue("unreachable");
+
+    try {
+      await runWithModelFallback({
+        cfg,
+        provider: "openai",
+        model: "gpt-4.1-mini",
+        fallbacksOverride: [],
+        run,
+      });
+      // Should not reach here
+      expect.unreachable("should have thrown");
+    } catch {
+      // With no fallbacks and all profiles in cooldown,
+      // shouldProbe = isPrimary && hasFallbackCandidates(false) && ... = false
+      // So it skips, then exhausts all candidates
+      expect(run).not.toHaveBeenCalled();
+    }
+  });
+});