mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-07 03:03:31 +00:00
feat(agents): add surgical rate limit vs auth/billing distinction in model fallback
- Add logic to distinguish between rate_limit, auth, and billing cooldown reasons - Rate limits: allow same-provider fallback attempts (different model may work) - Auth/billing issues: block all attempts for that provider (affects whole provider) - Add comprehensive test suite for cooldown behavior distinctions - Preserve existing probe logic and backward compatibility - Smart handling of providers without auth profiles based on context Fixes issue where all cooldown types were treated identically, preventing appropriate fallback strategies for different failure scenarios.
This commit is contained in:
committed by
Gustavo Madeira Santana
parent
0db091bf86
commit
6799c0505c
@@ -917,7 +917,175 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// Provider cooldown behavior preserved - focusing on Bug A (session overrides) only
|
||||
// Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction
|
||||
describe("fallback behavior with provider cooldowns", () => {
|
||||
async function makeAuthStoreWithCooldown(
|
||||
provider: string,
|
||||
reason: "rate_limit" | "auth" | "billing",
|
||||
): Promise<{ store: AuthProfileStore; dir: string }> {
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const now = Date.now();
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
[`${provider}:default`]: { type: "api_key", provider, key: "test-key" },
|
||||
},
|
||||
usageStats: {
|
||||
[`${provider}:default`]:
|
||||
reason === "rate_limit"
|
||||
? {
|
||||
// Rate limits use cooldownUntil
|
||||
cooldownUntil: now + 300000,
|
||||
disabledReason: reason as unknown,
|
||||
}
|
||||
: {
|
||||
// Auth/billing issues use disabledUntil
|
||||
disabledUntil: now + 300000,
|
||||
disabledReason: reason as unknown,
|
||||
},
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
return { store, dir: tmpDir };
|
||||
}
|
||||
|
||||
it("attempts same-provider fallbacks during rate limit cooldown", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
|
||||
});
|
||||
|
||||
it("does NOT attempt fallbacks during auth cooldown", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("should not be called");
|
||||
|
||||
try {
|
||||
await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
fail("Should have thrown error");
|
||||
} catch {
|
||||
// Auth cooldown should skip both primary and same-provider fallbacks
|
||||
expect(run).toHaveBeenCalledTimes(0); // No attempts made
|
||||
}
|
||||
});
|
||||
|
||||
it("does NOT attempt fallbacks during billing cooldown", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("should not be called");
|
||||
|
||||
try {
|
||||
await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
fail("Should have thrown error");
|
||||
} catch {
|
||||
// Billing cooldown should skip both primary and same-provider fallbacks
|
||||
expect(run).toHaveBeenCalledTimes(0); // No attempts made
|
||||
}
|
||||
});
|
||||
|
||||
it("tries cross-provider fallbacks when same provider has rate limit", async () => {
|
||||
// Anthropic in rate limit cooldown, Groq available
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
|
||||
"groq:default": { type: "api_key", provider: "groq", key: "test-key" },
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil
|
||||
disabledReason: "rate_limit" as unknown,
|
||||
},
|
||||
// Groq not in cooldown
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails
|
||||
.mockResolvedValueOnce("groq success"); // Groq works
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: tmpDir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("runWithImageModelFallback", () => {
|
||||
|
||||
@@ -353,11 +353,25 @@ export async function runWithModelFallback<T>(params: {
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
|
||||
if (profileIds.length === 0) {
|
||||
// Check if there are any auth/billing issues in the auth store
|
||||
// Only block "no profile" providers when there are persistent auth issues
|
||||
const hasAuthIssues = Object.values(authStore.usageStats || {}).some(
|
||||
(stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing",
|
||||
);
|
||||
if (hasAuthIssues) {
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `No auth profiles configured for provider ${candidate.provider}`,
|
||||
reason: "auth",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown.
|
||||
// For the primary model (i === 0), probe it if the soonest cooldown
|
||||
// expiry is close or already past. This avoids staying on a fallback
|
||||
// model long after the real rate-limit window clears.
|
||||
const isPrimary = i === 0;
|
||||
const requestedModel =
|
||||
params.provider === candidate.provider && params.model === candidate.model;
|
||||
@@ -371,10 +385,21 @@ export async function runWithModelFallback<T>(params: {
|
||||
authStore,
|
||||
profileIds,
|
||||
});
|
||||
// Always try fallback models even during cooldown, since rate limits are often model-specific.
|
||||
// Only skip if it's the same model that originally failed or if we should not probe primary.
|
||||
const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe;
|
||||
|
||||
const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason;
|
||||
const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing";
|
||||
if (isPersistentIssue) {
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`,
|
||||
reason: disabledReason,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Always try fallback models even during cooldown, since rate limits are often model-specific.
|
||||
const shouldAttemptDespiteCooldown = !isPrimary || !requestedModel || shouldProbe;
|
||||
if (!shouldAttemptDespiteCooldown) {
|
||||
const inferredReason =
|
||||
resolveProfilesUnavailableReason({
|
||||
@@ -382,7 +407,6 @@ export async function runWithModelFallback<T>(params: {
|
||||
profileIds,
|
||||
now,
|
||||
}) ?? "rate_limit";
|
||||
// Skip without attempting
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
@@ -391,12 +415,12 @@ export async function runWithModelFallback<T>(params: {
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Primary model probe: attempt it despite cooldown to detect recovery.
|
||||
// If it fails, the error is caught below and we fall through to the
|
||||
// next candidate as usual.
|
||||
lastProbeAttempt.set(probeThrottleKey, now);
|
||||
if (isPrimary && shouldProbe) {
|
||||
lastProbeAttempt.set(probeThrottleKey, now);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await params.run(candidate.provider, candidate.model);
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user