feat(agents): add surgical rate limit vs auth/billing distinction in model fallback

- Add logic to distinguish between rate_limit, auth, and billing cooldown reasons - Rate limits: allow same-provider fallback attempts (different model may work) - Auth/billing issues: block all attempts for that provider (affects whole provider) - Add comprehensive test suite for cooldown behavior distinctions - Preserve existing probe logic and backward compatibility - Smart handling of providers without auth profiles based on context Fixes issue where all cooldown types were treated identically, preventing appropriate fallback strategies for different failure scenarios.
2026-06-07 03:03:31 +00:00 · 2026-02-22 13:16:17 -06:00
parent 0db091bf86
commit 6799c0505c
2 changed files with 204 additions and 12 deletions
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -917,7 +917,175 @@ describe("runWithModelFallback", () => {
    });
  });

-  // Provider cooldown behavior preserved - focusing on Bug A (session overrides) only
+  // Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction
+  describe("fallback behavior with provider cooldowns", () => {
+    async function makeAuthStoreWithCooldown(
+      provider: string,
+      reason: "rate_limit" | "auth" | "billing",
+    ): Promise<{ store: AuthProfileStore; dir: string }> {
+      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
+      const now = Date.now();
+      const store: AuthProfileStore = {
+        version: AUTH_STORE_VERSION,
+        profiles: {
+          [`${provider}:default`]: { type: "api_key", provider, key: "test-key" },
+        },
+        usageStats: {
+          [`${provider}:default`]:
+            reason === "rate_limit"
+              ? {
+                  // Rate limits use cooldownUntil
+                  cooldownUntil: now + 300000,
+                  disabledReason: reason as unknown,
+                }
+              : {
+                  // Auth/billing issues use disabledUntil
+                  disabledUntil: now + 300000,
+                  disabledReason: reason as unknown,
+                },
+        },
+      };
+      saveAuthProfileStore(store, tmpDir);
+      return { store, dir: tmpDir };
+    }
+
+    it("attempts same-provider fallbacks during rate limit cooldown", async () => {
+      const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+            },
+          },
+        },
+      });
+
+      const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds
+
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: dir,
+      });
+
+      expect(result.result).toBe("sonnet success");
+      expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
+      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
+    });
+
+    it("does NOT attempt fallbacks during auth cooldown", async () => {
+      const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+            },
+          },
+        },
+      });
+
+      const run = vi.fn().mockResolvedValueOnce("should not be called");
+
+      try {
+        await runWithModelFallback({
+          cfg,
+          provider: "anthropic",
+          model: "claude-opus-4-6",
+          run,
+          agentDir: dir,
+        });
+        fail("Should have thrown error");
+      } catch {
+        // Auth cooldown should skip both primary and same-provider fallbacks
+        expect(run).toHaveBeenCalledTimes(0); // No attempts made
+      }
+    });
+
+    it("does NOT attempt fallbacks during billing cooldown", async () => {
+      const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+            },
+          },
+        },
+      });
+
+      const run = vi.fn().mockResolvedValueOnce("should not be called");
+
+      try {
+        await runWithModelFallback({
+          cfg,
+          provider: "anthropic",
+          model: "claude-opus-4-6",
+          run,
+          agentDir: dir,
+        });
+        fail("Should have thrown error");
+      } catch {
+        // Billing cooldown should skip both primary and same-provider fallbacks
+        expect(run).toHaveBeenCalledTimes(0); // No attempts made
+      }
+    });
+
+    it("tries cross-provider fallbacks when same provider has rate limit", async () => {
+      // Anthropic in rate limit cooldown, Groq available
+      const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
+      const store: AuthProfileStore = {
+        version: AUTH_STORE_VERSION,
+        profiles: {
+          "anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
+          "groq:default": { type: "api_key", provider: "groq", key: "test-key" },
+        },
+        usageStats: {
+          "anthropic:default": {
+            cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil
+            disabledReason: "rate_limit" as unknown,
+          },
+          // Groq not in cooldown
+        },
+      };
+      saveAuthProfileStore(store, tmpDir);
+
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+            },
+          },
+        },
+      });
+
+      const run = vi
+        .fn()
+        .mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails
+        .mockResolvedValueOnce("groq success"); // Groq works
+
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: tmpDir,
+      });
+
+      expect(result.result).toBe("groq success");
+      expect(run).toHaveBeenCalledTimes(2);
+      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt
+      expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
+    });
+  });
 });

 describe("runWithImageModelFallback", () => {
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -353,11 +353,25 @@ export async function runWithModelFallback<T>(params: {
      });
      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));

+      if (profileIds.length === 0) {
+        // Check if there are any auth/billing issues in the auth store
+        // Only block "no profile" providers when there are persistent auth issues
+        const hasAuthIssues = Object.values(authStore.usageStats || {}).some(
+          (stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing",
+        );
+        if (hasAuthIssues) {
+          attempts.push({
+            provider: candidate.provider,
+            model: candidate.model,
+            error: `No auth profiles configured for provider ${candidate.provider}`,
+            reason: "auth",
+          });
+          continue;
+        }
+      }
+
      if (profileIds.length > 0 && !isAnyProfileAvailable) {
        // All profiles for this provider are in cooldown.
-        // For the primary model (i === 0), probe it if the soonest cooldown
-        // expiry is close or already past. This avoids staying on a fallback
-        // model long after the real rate-limit window clears.
        const isPrimary = i === 0;
        const requestedModel =
          params.provider === candidate.provider && params.model === candidate.model;
@@ -371,10 +385,21 @@ export async function runWithModelFallback<T>(params: {
          authStore,
          profileIds,
        });
-        // Always try fallback models even during cooldown, since rate limits are often model-specific.
-        // Only skip if it's the same model that originally failed or if we should not probe primary.
-        const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe;

+        const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason;
+        const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing";
+        if (isPersistentIssue) {
+          attempts.push({
+            provider: candidate.provider,
+            model: candidate.model,
+            error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`,
+            reason: disabledReason,
+          });
+          continue;
+        }
+
+        // Always try fallback models even during cooldown, since rate limits are often model-specific.
+        const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe;
        if (!shouldAttemptDespiteCooldown) {
          const inferredReason =
            resolveProfilesUnavailableReason({
@@ -382,7 +407,6 @@ export async function runWithModelFallback<T>(params: {
              profileIds,
              now,
            }) ?? "rate_limit";
-          // Skip without attempting
          attempts.push({
            provider: candidate.provider,
            model: candidate.model,
@@ -391,12 +415,12 @@ export async function runWithModelFallback<T>(params: {
          });
          continue;
        }
-        // Primary model probe: attempt it despite cooldown to detect recovery.
-        // If it fails, the error is caught below and we fall through to the
-        // next candidate as usual.
-        lastProbeAttempt.set(probeThrottleKey, now);
+        if (isPrimary && shouldProbe) {
+          lastProbeAttempt.set(probeThrottleKey, now);
+        }
      }
    }
+
    try {
      const result = await params.run(candidate.provider, candidate.model);
      return {