diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index a71ef414da5..97ade46eb3b 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -917,7 +917,175 @@ describe("runWithModelFallback", () => { }); }); - // Provider cooldown behavior preserved - focusing on Bug A (session overrides) only + // Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction + describe("fallback behavior with provider cooldowns", () => { + async function makeAuthStoreWithCooldown( + provider: string, + reason: "rate_limit" | "auth" | "billing", + ): Promise<{ store: AuthProfileStore; dir: string }> { + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-")); + const now = Date.now(); + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [`${provider}:default`]: { type: "api_key", provider, key: "test-key" }, + }, + usageStats: { + [`${provider}:default`]: + reason === "rate_limit" + ? { + // Rate limits use cooldownUntil + cooldownUntil: now + 300000, + disabledReason: reason as unknown, + } + : { + // Auth/billing issues use disabledUntil + disabledUntil: now + 300000, + disabledReason: reason as unknown, + }, + }, + }; + saveAuthProfileStore(store, tmpDir); + return { store, dir: tmpDir }; + } + + it("attempts same-provider fallbacks during rate limit cooldown", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + + expect(result.result).toBe("sonnet success"); + expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); + }); + + it("does NOT attempt fallbacks during auth cooldown", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("should not be called"); + + try { + await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + fail("Should have thrown error"); + } catch { + // Auth cooldown should skip both primary and same-provider fallbacks + expect(run).toHaveBeenCalledTimes(0); // No attempts made + } + }); + + it("does NOT attempt fallbacks during billing cooldown", async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing"); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi.fn().mockResolvedValueOnce("should not be called"); + + try { + await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); + fail("Should have thrown error"); + } catch { + // Billing cooldown should skip both primary and same-provider fallbacks + expect(run).toHaveBeenCalledTimes(0); // No attempts made + } + }); + + it("tries cross-provider fallbacks when same provider has rate limit", async () => { + // Anthropic in rate limit cooldown, Groq available + const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-")); + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" }, + "groq:default": { type: "api_key", provider: "groq", key: "test-key" }, + }, + usageStats: { + "anthropic:default": { + cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil + disabledReason: "rate_limit" as unknown, + }, + // Groq not in cooldown + }, + }; + saveAuthProfileStore(store, tmpDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, + }, + }); + + const run = vi + .fn() + .mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails + .mockResolvedValueOnce("groq success"); // Groq works + + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: tmpDir, + }); + + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt + expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works + }); + }); }); describe("runWithImageModelFallback", () => { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 3bcb2e6a9d1..e1ea16efaa6 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -353,11 +353,25 @@ export async function runWithModelFallback(params: { }); const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); + if (profileIds.length === 0) { + // Check if there are any auth/billing issues in the auth store + // Only block "no profile" providers when there are persistent auth issues + const hasAuthIssues = Object.values(authStore.usageStats || {}).some( + (stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing", + ); + if (hasAuthIssues) { + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: `No auth profiles configured for provider ${candidate.provider}`, + reason: "auth", + }); + continue; + } + } + if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. - // For the primary model (i === 0), probe it if the soonest cooldown - // expiry is close or already past. This avoids staying on a fallback - // model long after the real rate-limit window clears. const isPrimary = i === 0; const requestedModel = params.provider === candidate.provider && params.model === candidate.model; @@ -371,10 +385,21 @@ export async function runWithModelFallback(params: { authStore, profileIds, }); - // Always try fallback models even during cooldown, since rate limits are often model-specific. - // Only skip if it's the same model that originally failed or if we should not probe primary. - const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe; + const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason; + const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing"; + if (isPersistentIssue) { + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`, + reason: disabledReason, + }); + continue; + } + + // Always try fallback models even during cooldown, since rate limits are often model-specific. + const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe; if (!shouldAttemptDespiteCooldown) { const inferredReason = resolveProfilesUnavailableReason({ @@ -382,7 +407,6 @@ export async function runWithModelFallback(params: { profileIds, now, }) ?? "rate_limit"; - // Skip without attempting attempts.push({ provider: candidate.provider, model: candidate.model, @@ -391,12 +415,12 @@ export async function runWithModelFallback(params: { }); continue; } - // Primary model probe: attempt it despite cooldown to detect recovery. - // If it fails, the error is caught below and we fall through to the - // next candidate as usual. - lastProbeAttempt.set(probeThrottleKey, now); + if (isPrimary && shouldProbe) { + lastProbeAttempt.set(probeThrottleKey, now); + } } } + try { const result = await params.run(candidate.provider, candidate.model); return {