From 39bb1b33222e44139128c3a9e04124df97e7d934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8Dtalo=20Souza?= Date: Mon, 16 Feb 2026 10:03:35 -0300 Subject: [PATCH] fix: auto-recover primary model after rate-limit cooldown expires (#17478) (#18045) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: f7a7865727a9aee0aaa3d929cce65dc46c3db234 Co-authored-by: PlayerGhost <28265945+PlayerGhost@users.noreply.github.com> Co-authored-by: sebslight <19554889+sebslight@users.noreply.github.com> Reviewed-by: @sebslight --- CHANGELOG.md | 1 + src/agents/auth-profiles.ts | 1 + src/agents/auth-profiles/usage.ts | 26 ++ src/agents/model-fallback.probe.test.ts | 358 ++++++++++++++++++++++++ src/agents/model-fallback.ts | 57 +++- 5 files changed, 435 insertions(+), 8 deletions(-) create mode 100644 src/agents/model-fallback.probe.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index a0db9f5c1d9..37391c7c3a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai - Browser/Agents: when browser control service is unavailable, return explicit non-retry guidance (instead of "try again") so models do not loop on repeated browser tool calls until timeout. (#17673) Thanks @austenstone. - Subagents: use child-run-based deterministic announce idempotency keys across direct and queued delivery paths (with legacy queued-item fallback) to prevent duplicate announce retries without collapsing distinct same-millisecond announces. (#17150) Thanks @widingmarcus-cyber. - Subagents/Models: preserve `agents.defaults.model.fallbacks` when subagent sessions carry a model override, so subagent runs fail over to configured fallback models instead of retrying only the overridden primary model. +- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost. - Telegram: omit `message_thread_id` for DM sends/draft previews and keep forum-topic handling (`id=1` general omitted, non-general kept), preventing DM failures with `400 Bad Request: message thread not found`. (#10942) Thanks @garnetlyx. - Telegram: replace inbound `` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023. - Telegram: retry inbound media `getFile` calls (3 attempts with backoff) and gracefully fall back to placeholder-only processing when retries fail, preventing dropped voice/media messages on transient Telegram network errors. (#16154) Thanks @yinghaosang. diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts index 91593f3a6b1..14b383d041f 100644 --- a/src/agents/auth-profiles.ts +++ b/src/agents/auth-profiles.ts @@ -33,6 +33,7 @@ export type { export { calculateAuthProfileCooldownMs, clearAuthProfileCooldown, + getSoonestCooldownExpiry, isProfileInCooldown, markAuthProfileCooldown, markAuthProfileFailure, diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 8028a7f08a9..366eddf6678 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -25,6 +25,32 @@ export function isProfileInCooldown(store: AuthProfileStore, profileId: string): return unusableUntil ? Date.now() < unusableUntil : false; } +/** + * Return the soonest `unusableUntil` timestamp (ms epoch) among the given + * profiles, or `null` when no profile has a recorded cooldown. Note: the + * returned timestamp may be in the past if the cooldown has already expired. + */ +export function getSoonestCooldownExpiry( + store: AuthProfileStore, + profileIds: string[], +): number | null { + let soonest: number | null = null; + for (const id of profileIds) { + const stats = store.usageStats?.[id]; + if (!stats) { + continue; + } + const until = resolveProfileUnusableUntil(stats); + if (typeof until !== "number" || !Number.isFinite(until) || until <= 0) { + continue; + } + if (soonest === null || until < soonest) { + soonest = until; + } + } + return soonest; +} + /** * Mark a profile as successfully used. Resets error count and updates lastUsed. * Uses store lock to avoid overwriting concurrent usage updates. diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts new file mode 100644 index 00000000000..b51c6bad537 --- /dev/null +++ b/src/agents/model-fallback.probe.test.ts @@ -0,0 +1,358 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { OpenClawConfig } from "../config/config.js"; +import type { AuthProfileStore } from "./auth-profiles.js"; + +// Mock auth-profiles module — must be before importing model-fallback +vi.mock("./auth-profiles.js", () => ({ + ensureAuthProfileStore: vi.fn(), + getSoonestCooldownExpiry: vi.fn(), + isProfileInCooldown: vi.fn(), + resolveAuthProfileOrder: vi.fn(), +})); + +import { + ensureAuthProfileStore, + getSoonestCooldownExpiry, + isProfileInCooldown, + resolveAuthProfileOrder, +} from "./auth-profiles.js"; +import { _probeThrottleInternals, runWithModelFallback } from "./model-fallback.js"; + +const mockedEnsureAuthProfileStore = vi.mocked(ensureAuthProfileStore); +const mockedGetSoonestCooldownExpiry = vi.mocked(getSoonestCooldownExpiry); +const mockedIsProfileInCooldown = vi.mocked(isProfileInCooldown); +const mockedResolveAuthProfileOrder = vi.mocked(resolveAuthProfileOrder); + +function makeCfg(overrides: Partial = {}): OpenClawConfig { + return { + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: ["anthropic/claude-haiku-3-5"], + }, + }, + }, + ...overrides, + } as OpenClawConfig; +} + +describe("runWithModelFallback – probe logic", () => { + let realDateNow: () => number; + const NOW = 1_700_000_000_000; + + beforeEach(() => { + realDateNow = Date.now; + Date.now = vi.fn(() => NOW); + + // Clear throttle state between tests + _probeThrottleInternals.lastProbeAttempt.clear(); + + // Default: ensureAuthProfileStore returns a fake store + const fakeStore: AuthProfileStore = { + version: 1, + profiles: {}, + }; + mockedEnsureAuthProfileStore.mockReturnValue(fakeStore); + + // Default: resolveAuthProfileOrder returns profiles only for "openai" provider + mockedResolveAuthProfileOrder.mockImplementation(({ provider }: { provider: string }) => { + if (provider === "openai") { + return ["openai-profile-1"]; + } + if (provider === "anthropic") { + return ["anthropic-profile-1"]; + } + if (provider === "google") { + return ["google-profile-1"]; + } + return []; + }); + // Default: only openai profiles are in cooldown; fallback providers are available + mockedIsProfileInCooldown.mockImplementation((_store, profileId: string) => { + return profileId.startsWith("openai"); + }); + }); + + afterEach(() => { + Date.now = realDateNow; + vi.restoreAllMocks(); + }); + + it("skips primary model when far from cooldown expiry (30 min remaining)", async () => { + const cfg = makeCfg(); + // Cooldown expires in 30 min — well beyond the 2-min margin + const expiresIn30Min = NOW + 30 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); + + const run = vi.fn().mockResolvedValue("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + // Should skip primary and use fallback + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5"); + expect(result.attempts[0]?.reason).toBe("rate_limit"); + }); + + it("probes primary model when within 2-min margin of cooldown expiry", async () => { + const cfg = makeCfg(); + // Cooldown expires in 1 minute — within 2-min probe margin + const expiresIn1Min = NOW + 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min); + + const run = vi.fn().mockResolvedValue("probed-ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + // Should probe primary and succeed + expect(result.result).toBe("probed-ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("probes primary model when cooldown already expired", async () => { + const cfg = makeCfg(); + // Cooldown expired 5 min ago + const expiredAlready = NOW - 5 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiredAlready); + + const run = vi.fn().mockResolvedValue("recovered"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("recovered"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("does NOT probe non-primary candidates during cooldown", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: ["anthropic/claude-haiku-3-5", "google/gemini-2-flash"], + }, + }, + }, + } as Partial); + + // Override: ALL providers in cooldown for this test + mockedIsProfileInCooldown.mockReturnValue(true); + + // All profiles in cooldown, cooldown just about to expire + const almostExpired = NOW + 30 * 1000; // 30s remaining + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + // Primary probe fails with 429 + const run = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 })) + .mockResolvedValue("should-not-reach"); + + try { + await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + expect.unreachable("should have thrown since all candidates exhausted"); + } catch { + // Primary was probed (i === 0 + within margin), non-primary were skipped + expect(run).toHaveBeenCalledTimes(1); // only primary was actually called + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + } + }); + + it("throttles probe when called within 30s interval", async () => { + const cfg = makeCfg(); + // Cooldown just about to expire (within probe margin) + const almostExpired = NOW + 30 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + // Simulate a recent probe 10s ago + _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000); + + const run = vi.fn().mockResolvedValue("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + // Should be throttled → skip primary, use fallback + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5"); + expect(result.attempts[0]?.reason).toBe("rate_limit"); + }); + + it("allows probe when 30s have passed since last probe", async () => { + const cfg = makeCfg(); + const almostExpired = NOW + 30 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + // Last probe was 31s ago — should NOT be throttled + _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 31_000); + + const run = vi.fn().mockResolvedValue("probed-ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("probed-ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("handles non-finite soonest safely (treats as probe-worthy)", async () => { + const cfg = makeCfg(); + + // Return Infinity — should be treated as "probe" per the guard + mockedGetSoonestCooldownExpiry.mockReturnValue(Infinity); + + const run = vi.fn().mockResolvedValue("ok-infinity"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok-infinity"); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("handles NaN soonest safely (treats as probe-worthy)", async () => { + const cfg = makeCfg(); + + mockedGetSoonestCooldownExpiry.mockReturnValue(NaN); + + const run = vi.fn().mockResolvedValue("ok-nan"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok-nan"); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("handles null soonest safely (treats as probe-worthy)", async () => { + const cfg = makeCfg(); + + mockedGetSoonestCooldownExpiry.mockReturnValue(null); + + const run = vi.fn().mockResolvedValue("ok-null"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok-null"); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); + }); + + it("single candidate (no fallbacks) → no probe, normal skip behavior", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: [], // no fallbacks + }, + }, + }, + } as Partial); + + // Cooldown expires within probe margin + const almostExpired = NOW + 30 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + const run = vi.fn().mockResolvedValue("should-not-probe"); + + // With single candidate + hasFallbackCandidates === false, + // shouldProbe is false → skip with rate_limit + await expect( + runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run, + }), + ).rejects.toThrow(); + + // run should still be called once (single candidate, no fallbacks = try it directly? + // Actually with all profiles in cooldown and no fallback candidates, + // it skips the primary and throws "all candidates exhausted" + // Let's verify the attempt shows rate_limit + }); + + it("single candidate skips with rate_limit and exhausts candidates", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: [], + }, + }, + }, + } as Partial); + + // Cooldown within probe margin — but probe only applies when hasFallbackCandidates + const almostExpired = NOW + 30 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + const run = vi.fn().mockResolvedValue("unreachable"); + + try { + await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run, + }); + // Should not reach here + expect.unreachable("should have thrown"); + } catch { + // With no fallbacks and all profiles in cooldown, + // shouldProbe = isPrimary && hasFallbackCandidates(false) && ... = false + // So it skips, then exhausts all candidates + expect(run).not.toHaveBeenCalled(); + } + }); +}); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 61c2ce1014c..09c84a0606d 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -2,6 +2,7 @@ import type { OpenClawConfig } from "../config/config.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; import { ensureAuthProfileStore, + getSoonestCooldownExpiry, isProfileInCooldown, resolveAuthProfileOrder, } from "./auth-profiles.js"; @@ -217,6 +218,15 @@ function resolveFallbackCandidates(params: { return candidates; } +const lastProbeAttempt = new Map(); +const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per provider + +/** @internal – exposed for unit tests only */ +export const _probeThrottleInternals = { + lastProbeAttempt, + MIN_PROBE_INTERVAL_MS, +} as const; + export async function runWithModelFallback(params: { cfg: OpenClawConfig | undefined; provider: string; @@ -239,6 +249,8 @@ export async function runWithModelFallback(params: { const attempts: FallbackAttempt[] = []; let lastError: unknown; + const hasFallbackCandidates = candidates.length > 1; + for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; if (authStore) { @@ -250,14 +262,43 @@ export async function runWithModelFallback(params: { const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); if (profileIds.length > 0 && !isAnyProfileAvailable) { - // All profiles for this provider are in cooldown; skip without attempting - attempts.push({ - provider: candidate.provider, - model: candidate.model, - error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, - reason: "rate_limit", - }); - continue; + // All profiles for this provider are in cooldown. + // For the primary model (i === 0), probe it if the soonest cooldown + // expiry is close (within 2 minutes) or already past. This avoids + // staying on a fallback model long after the rate-limit window clears + // when exponential backoff cooldowns exceed the actual provider limit. + const isPrimary = i === 0; + const shouldProbe = + isPrimary && + hasFallbackCandidates && + (() => { + const lastProbe = lastProbeAttempt.get(candidate.provider) ?? 0; + if (Date.now() - lastProbe < MIN_PROBE_INTERVAL_MS) { + return false; // throttled + } + const soonest = getSoonestCooldownExpiry(authStore, profileIds); + if (soonest === null || !Number.isFinite(soonest)) { + return true; + } + const now = Date.now(); + // Probe when cooldown already expired or within 2 min of expiry + const PROBE_MARGIN_MS = 2 * 60 * 1000; + return now >= soonest - PROBE_MARGIN_MS; + })(); + if (!shouldProbe) { + // Skip without attempting + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, + reason: "rate_limit", + }); + continue; + } + // Primary model probe: attempt it despite cooldown to detect recovery. + // If it fails, the error is caught below and we fall through to the + // next candidate as usual. + lastProbeAttempt.set(candidate.provider, Date.now()); } } try {