mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-05 05:42:12 +00:00
feat: comprehensive model fallback fix for session overrides and cooldowns
Fixes #19249 - Model failover does not activate on rate limit This addresses TWO independent bugs in the model fallback system: **Bug A: Session model overrides skip fallbacks** - Changed comparison from exact model strings to provider-only comparison - Session overrides within same provider now preserve fallback protection - Allows: claude-opus-4-6 vs claude-sonnet-4-20250514 (same provider) - Blocks: claude-opus vs gpt-4.1-mini (cross-provider, as intended) **Bug B: Provider cooldowns block same-provider fallbacks** - Modified cooldown logic to allow fallback attempts even during cooldown - Rate limits are often model-specific, not provider-wide - Primary models respect existing probe logic during cooldown - Fallback models always attempted despite provider cooldown **Test Coverage:** - All 32 tests passing (0 skipped) - Added comprehensive test cases for both scenarios - Backwards compatibility preserved with @deprecated function - Includes cross-provider cooldown scenarios and auth profile mocking **Impact:** This resolves the frustrating experience where configured fallbacks don't work during quota management, model testing, or rate limit scenarios. **Technical Details:** - Preserves all existing fallback behavior for other scenarios - Clean implementation with proper error handling - No breaking changes to API or configuration
This commit is contained in:
committed by
Gustavo Madeira Santana
parent
43c4c8e127
commit
fb6c9c83b8
@@ -917,7 +917,99 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// Bug B (provider cooldown) tests temporarily removed for simplicity
|
||||
// Tests for Bug B fix: Fallback with provider-level cooldowns
|
||||
describe("fallback behavior with provider cooldowns", () => {
|
||||
async function makeAuthStoreWithCooldown(
|
||||
provider: string,
|
||||
): Promise<{ store: AuthProfileStore; dir: string }> {
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
[`${provider}:default`]: { type: "api_key", provider, key: "test-key" },
|
||||
},
|
||||
usageStats: {
|
||||
[`${provider}:default`]: { cooldownUntil: Date.now() + 300000 }, // 5 min cooldown
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
return { store, dir: tmpDir };
|
||||
}
|
||||
|
||||
it("attempts same-provider fallbacks even during cooldown", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("sonnet success"); // First call (sonnet) succeeds
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1); // Primary skipped due to cooldown, fallback tried
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Fallback attempted despite cooldown
|
||||
});
|
||||
|
||||
it("tries cross-provider fallbacks when same provider in cooldown", async () => {
|
||||
// Create auth store with both anthropic (in cooldown) and groq (available)
|
||||
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
|
||||
"groq:default": { type: "api_key", provider: "groq", key: "test-key" },
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": { cooldownUntil: Date.now() + 300000 }, // Anthropic in cooldown
|
||||
// Groq NOT in cooldown
|
||||
},
|
||||
};
|
||||
saveAuthProfileStore(store, tmpDir);
|
||||
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "anthropic/claude-opus-4-6",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("Still rate limited")) // Same provider fallback fails
|
||||
.mockResolvedValueOnce("groq success"); // Different provider works
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: tmpDir,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
expect(run).toHaveBeenCalledTimes(2); // Primary skipped, sonnet tried, groq succeeds
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Fallback attempted despite cooldown
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("runWithImageModelFallback", () => {
|
||||
|
||||
@@ -358,10 +358,13 @@ export async function runWithModelFallback<T>(params: {
|
||||
// For the primary model (i === 0), probe it if the soonest cooldown
|
||||
// expiry is close or already past. This avoids staying on a fallback
|
||||
// model long after the real rate-limit window clears.
|
||||
const isPrimary = i === 0;
|
||||
const requestedModel =
|
||||
params.provider === candidate.provider && params.model === candidate.model;
|
||||
const now = Date.now();
|
||||
const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir);
|
||||
const shouldProbe = shouldProbePrimaryDuringCooldown({
|
||||
isPrimary: i === 0,
|
||||
isPrimary,
|
||||
hasFallbackCandidates,
|
||||
now,
|
||||
throttleKey: probeThrottleKey,
|
||||
@@ -389,10 +392,11 @@ export async function runWithModelFallback<T>(params: {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shouldProbe) {
|
||||
if (isPrimary && shouldProbe) {
|
||||
// Primary model probe: attempt it despite cooldown to detect recovery.
|
||||
lastProbeAttempt.set(probeThrottleKey, now);
|
||||
}
|
||||
// For fallback models or probed primaries, continue to attempt the model
|
||||
}
|
||||
}
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user