fix: auto-recover primary model after rate-limit cooldown expires (#17478) (#18045)

Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: f7a7865727 Co-authored-by: PlayerGhost <28265945+PlayerGhost@users.noreply.github.com> Co-authored-by: sebslight <19554889+sebslight@users.noreply.github.com> Reviewed-by: @sebslight
2026-05-09 22:04:30 +00:00 · 2026-02-16 10:03:35 -03:00
parent 244ed9db39
commit 39bb1b3322
5 changed files with 435 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai
 - Browser/Agents: when browser control service is unavailable, return explicit non-retry guidance (instead of "try again") so models do not loop on repeated browser tool calls until timeout. (#17673) Thanks @austenstone.
 - Subagents: use child-run-based deterministic announce idempotency keys across direct and queued delivery paths (with legacy queued-item fallback) to prevent duplicate announce retries without collapsing distinct same-millisecond announces. (#17150) Thanks @widingmarcus-cyber.
 - Subagents/Models: preserve `agents.defaults.model.fallbacks` when subagent sessions carry a model override, so subagent runs fail over to configured fallback models instead of retrying only the overridden primary model.
 - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
 - Telegram: omit `message_thread_id` for DM sends/draft previews and keep forum-topic handling (`id=1` general omitted, non-general kept), preventing DM failures with `400 Bad Request: message thread not found`. (#10942) Thanks @garnetlyx.
 - Telegram: replace inbound `<media:audio>` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023.
 - Telegram: retry inbound media `getFile` calls (3 attempts with backoff) and gracefully fall back to placeholder-only processing when retries fail, preventing dropped voice/media messages on transient Telegram network errors. (#16154) Thanks @yinghaosang.
--- a/src/agents/auth-profiles.ts
+++ b/src/agents/auth-profiles.ts
@@ -33,6 +33,7 @@ export type {
 export {
  calculateAuthProfileCooldownMs,
  clearAuthProfileCooldown,
  getSoonestCooldownExpiry,
  isProfileInCooldown,
  markAuthProfileCooldown,
  markAuthProfileFailure,
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -25,6 +25,32 @@ export function isProfileInCooldown(store: AuthProfileStore, profileId: string):
  return unusableUntil ? Date.now() < unusableUntil : false;
 }
 /**
 * Return the soonest `unusableUntil` timestamp (ms epoch) among the given
 * profiles, or `null` when no profile has a recorded cooldown. Note: the
 * returned timestamp may be in the past if the cooldown has already expired.
 */
 export function getSoonestCooldownExpiry(
  store: AuthProfileStore,
  profileIds: string[],
 ): number | null {
  let soonest: number | null = null;
  for (const id of profileIds) {
    const stats = store.usageStats?.[id];
    if (!stats) {
      continue;
    }
    const until = resolveProfileUnusableUntil(stats);
    if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) {
      continue;
    }
    if (soonest === null || until < soonest) {
      soonest = until;
    }
  }
  return soonest;
 }
 /**
 * Mark a profile as successfully used. Resets error count and updates lastUsed.
 * Uses store lock to avoid overwriting concurrent usage updates.
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -0,0 +1,358 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
 import type { AuthProfileStore } from "./auth-profiles.js";
 // Mock auth-profiles module — must be before importing model-fallback
 vi.mock("./auth-profiles.js", () => ({
  ensureAuthProfileStore: vi.fn(),
  getSoonestCooldownExpiry: vi.fn(),
  isProfileInCooldown: vi.fn(),
  resolveAuthProfileOrder: vi.fn(),
 }));
 import {
  ensureAuthProfileStore,
  getSoonestCooldownExpiry,
  isProfileInCooldown,
  resolveAuthProfileOrder,
 } from "./auth-profiles.js";
 import { _probeThrottleInternals, runWithModelFallback } from "./model-fallback.js";
 const mockedEnsureAuthProfileStore = vi.mocked(ensureAuthProfileStore);
 const mockedGetSoonestCooldownExpiry = vi.mocked(getSoonestCooldownExpiry);
 const mockedIsProfileInCooldown = vi.mocked(isProfileInCooldown);
 const mockedResolveAuthProfileOrder = vi.mocked(resolveAuthProfileOrder);
 function makeCfg(overrides: Partial<OpenClawConfig> = {}): OpenClawConfig {
  return {
    agents: {
      defaults: {
        model: {
          primary: "openai/gpt-4.1-mini",
          fallbacks: ["anthropic/claude-haiku-3-5"],
        },
      },
    },
    ...overrides,
  } as OpenClawConfig;
 }
 describe("runWithModelFallback – probe logic", () => {
  let realDateNow: () => number;
  const NOW = 1_700_000_000_000;
  beforeEach(() => {
    realDateNow = Date.now;
    Date.now = vi.fn(() => NOW);
    // Clear throttle state between tests
    _probeThrottleInternals.lastProbeAttempt.clear();
    // Default: ensureAuthProfileStore returns a fake store
    const fakeStore: AuthProfileStore = {
      version: 1,
      profiles: {},
    };
    mockedEnsureAuthProfileStore.mockReturnValue(fakeStore);
    // Default: resolveAuthProfileOrder returns profiles only for "openai" provider
    mockedResolveAuthProfileOrder.mockImplementation(({ provider }: { provider: string }) => {
      if (provider === "openai") {
        return ["openai-profile-1"];
      }
      if (provider === "anthropic") {
        return ["anthropic-profile-1"];
      }
      if (provider === "google") {
        return ["google-profile-1"];
      }
      return [];
    });
    // Default: only openai profiles are in cooldown; fallback providers are available
    mockedIsProfileInCooldown.mockImplementation((_store, profileId: string) => {
      return profileId.startsWith("openai");
    });
  });
  afterEach(() => {
    Date.now = realDateNow;
    vi.restoreAllMocks();
  });
  it("skips primary model when far from cooldown expiry (30 min remaining)", async () => {
    const cfg = makeCfg();
    // Cooldown expires in 30 min — well beyond the 2-min margin
    const expiresIn30Min = NOW + 30 * 60 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
    const run = vi.fn().mockResolvedValue("ok");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    // Should skip primary and use fallback
    expect(result.result).toBe("ok");
    expect(run).toHaveBeenCalledTimes(1);
    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
    expect(result.attempts[0]?.reason).toBe("rate_limit");
  });
  it("probes primary model when within 2-min margin of cooldown expiry", async () => {
    const cfg = makeCfg();
    // Cooldown expires in 1 minute — within 2-min probe margin
    const expiresIn1Min = NOW + 60 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
    const run = vi.fn().mockResolvedValue("probed-ok");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    // Should probe primary and succeed
    expect(result.result).toBe("probed-ok");
    expect(run).toHaveBeenCalledTimes(1);
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("probes primary model when cooldown already expired", async () => {
    const cfg = makeCfg();
    // Cooldown expired 5 min ago
    const expiredAlready = NOW - 5 * 60 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(expiredAlready);
    const run = vi.fn().mockResolvedValue("recovered");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    expect(result.result).toBe("recovered");
    expect(run).toHaveBeenCalledTimes(1);
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("does NOT probe non-primary candidates during cooldown", async () => {
    const cfg = makeCfg({
      agents: {
        defaults: {
          model: {
            primary: "openai/gpt-4.1-mini",
            fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"],
          },
        },
      },
    } as Partial<OpenClawConfig>);
    // Override: ALL providers in cooldown for this test
    mockedIsProfileInCooldown.mockReturnValue(true);
    // All profiles in cooldown, cooldown just about to expire
    const almostExpired = NOW + 30 * 1000; // 30s remaining
    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
    // Primary probe fails with 429
    const run = vi
      .fn()
      .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
      .mockResolvedValue("should-not-reach");
    try {
      await runWithModelFallback({
        cfg,
        provider: "openai",
        model: "gpt-4.1-mini",
        run,
      });
      expect.unreachable("should have thrown since all candidates exhausted");
    } catch {
      // Primary was probed (i === 0 + within margin), non-primary were skipped
      expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
      expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
    }
  });
  it("throttles probe when called within 30s interval", async () => {
    const cfg = makeCfg();
    // Cooldown just about to expire (within probe margin)
    const almostExpired = NOW + 30 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
    // Simulate a recent probe 10s ago
    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
    const run = vi.fn().mockResolvedValue("ok");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    // Should be throttled → skip primary, use fallback
    expect(result.result).toBe("ok");
    expect(run).toHaveBeenCalledTimes(1);
    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
    expect(result.attempts[0]?.reason).toBe("rate_limit");
  });
  it("allows probe when 30s have passed since last probe", async () => {
    const cfg = makeCfg();
    const almostExpired = NOW + 30 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
    // Last probe was 31s ago — should NOT be throttled
    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 31_000);
    const run = vi.fn().mockResolvedValue("probed-ok");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    expect(result.result).toBe("probed-ok");
    expect(run).toHaveBeenCalledTimes(1);
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("handles non-finite soonest safely (treats as probe-worthy)", async () => {
    const cfg = makeCfg();
    // Return Infinity — should be treated as "probe" per the guard
    mockedGetSoonestCooldownExpiry.mockReturnValue(Infinity);
    const run = vi.fn().mockResolvedValue("ok-infinity");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    expect(result.result).toBe("ok-infinity");
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("handles NaN soonest safely (treats as probe-worthy)", async () => {
    const cfg = makeCfg();
    mockedGetSoonestCooldownExpiry.mockReturnValue(NaN);
    const run = vi.fn().mockResolvedValue("ok-nan");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    expect(result.result).toBe("ok-nan");
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("handles null soonest safely (treats as probe-worthy)", async () => {
    const cfg = makeCfg();
    mockedGetSoonestCooldownExpiry.mockReturnValue(null);
    const run = vi.fn().mockResolvedValue("ok-null");
    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      run,
    });
    expect(result.result).toBe("ok-null");
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });
  it("single candidate (no fallbacks) → no probe, normal skip behavior", async () => {
    const cfg = makeCfg({
      agents: {
        defaults: {
          model: {
            primary: "openai/gpt-4.1-mini",
            fallbacks: [], // no fallbacks
          },
        },
      },
    } as Partial<OpenClawConfig>);
    // Cooldown expires within probe margin
    const almostExpired = NOW + 30 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
    const run = vi.fn().mockResolvedValue("should-not-probe");
    // With single candidate + hasFallbackCandidates === false,
    // shouldProbe is false → skip with rate_limit
    await expect(
      runWithModelFallback({
        cfg,
        provider: "openai",
        model: "gpt-4.1-mini",
        fallbacksOverride: [],
        run,
      }),
    ).rejects.toThrow();
    // run should still be called once (single candidate, no fallbacks = try it directly?
    // Actually with all profiles in cooldown and no fallback candidates,
    // it skips the primary and throws "all candidates exhausted"
    // Let's verify the attempt shows rate_limit
  });
  it("single candidate skips with rate_limit and exhausts candidates", async () => {
    const cfg = makeCfg({
      agents: {
        defaults: {
          model: {
            primary: "openai/gpt-4.1-mini",
            fallbacks: [],
          },
        },
      },
    } as Partial<OpenClawConfig>);
    // Cooldown within probe margin — but probe only applies when hasFallbackCandidates
    const almostExpired = NOW + 30 * 1000;
    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
    const run = vi.fn().mockResolvedValue("unreachable");
    try {
      await runWithModelFallback({
        cfg,
        provider: "openai",
        model: "gpt-4.1-mini",
        fallbacksOverride: [],
        run,
      });
      // Should not reach here
      expect.unreachable("should have thrown");
    } catch {
      // With no fallbacks and all profiles in cooldown,
      // shouldProbe = isPrimary && hasFallbackCandidates(false) && ... = false
      // So it skips, then exhausts all candidates
      expect(run).not.toHaveBeenCalled();
    }
  });
 });
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -2,6 +2,7 @@ import type { OpenClawConfig } from "../config/config.js";
 import type { FailoverReason } from "./pi-embedded-helpers.js";
 import {
  ensureAuthProfileStore,
  getSoonestCooldownExpiry,
  isProfileInCooldown,
  resolveAuthProfileOrder,
 } from "./auth-profiles.js";
@@ -217,6 +218,15 @@ function resolveFallbackCandidates(params: {
  return candidates;
 }
 const lastProbeAttempt = new Map<string, number>();
 const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per provider
 /** @internal – exposed for unit tests only */
 export const _probeThrottleInternals = {
  lastProbeAttempt,
  MIN_PROBE_INTERVAL_MS,
 } as const;
 export async function runWithModelFallback<T>(params: {
  cfg: OpenClawConfig | undefined;
  provider: string;
@@ -239,6 +249,8 @@ export async function runWithModelFallback<T>(params: {
  const attempts: FallbackAttempt[] = [];
  let lastError: unknown;
  const hasFallbackCandidates = candidates.length > 1;
  for (let i = 0; i < candidates.length; i += 1) {
    const candidate = candidates[i];
    if (authStore) {
@@ -250,14 +262,43 @@ export async function runWithModelFallback<T>(params: {
      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
      if (profileIds.length > 0 && !isAnyProfileAvailable) {
-        // All profiles for this provider are in cooldown; skip without attempting
+        // All profiles for this provider are in cooldown.
-        attempts.push({
+        // For the primary model (i === 0), probe it if the soonest cooldown
-          provider: candidate.provider,
+        // expiry is close (within 2 minutes) or already past. This avoids
-          model: candidate.model,
+        // staying on a fallback model long after the rate-limit window clears
-          error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
+        // when exponential backoff cooldowns exceed the actual provider limit.
-          reason: "rate_limit",
+        const isPrimary = i === 0;
-        });
+        const shouldProbe =
-        continue;
+          isPrimary &&
          hasFallbackCandidates &&
          (() => {
            const lastProbe = lastProbeAttempt.get(candidate.provider) ?? 0;
            if (Date.now() - lastProbe < MIN_PROBE_INTERVAL_MS) {
              return false; // throttled
            }
            const soonest = getSoonestCooldownExpiry(authStore, profileIds);
            if (soonest === null || !Number.isFinite(soonest)) {
              return true;
            }
            const now = Date.now();
            // Probe when cooldown already expired or within 2 min of expiry
            const PROBE_MARGIN_MS = 2 * 60 * 1000;
            return now >= soonest - PROBE_MARGIN_MS;
          })();
        if (!shouldProbe) {
          // Skip without attempting
          attempts.push({
            provider: candidate.provider,
            model: candidate.model,
            error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
            reason: "rate_limit",
          });
          continue;
        }
        // Primary model probe: attempt it despite cooldown to detect recovery.
        // If it fails, the error is caught below and we fall through to the
        // next candidate as usual.
        lastProbeAttempt.set(candidate.provider, Date.now());
      }
    }
    try {