mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 09:08:38 +00:00
fix(agents): comprehensive quota fallback fixes - session overrides + surgical cooldown logic (#23816)
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: e6f2b4742b
Co-authored-by: ramezgaberiel <844893+ramezgaberiel@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling.
|
- Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling.
|
||||||
- Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3.
|
- Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3.
|
||||||
- Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231)
|
- Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231)
|
||||||
|
- Agents/Model fallback: keep same-provider fallback chains active when session model differs from configured primary, infer cooldown reason from provider profile state (instead of `disabledReason` only), keep no-profile fallback providers eligible (env/models.json paths), and only relax same-provider cooldown fallback attempts for `rate_limit`. (#23816) thanks @ramezgaberiel.
|
||||||
- Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin.
|
- Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin.
|
||||||
- Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin.
|
- Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin.
|
||||||
- Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin.
|
- Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin.
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ describe("runWithModelFallback – probe logic", () => {
|
|||||||
expectPrimaryProbeSuccess(result, run, "recovered");
|
expectPrimaryProbeSuccess(result, run, "recovered");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("does NOT probe non-primary candidates during cooldown", async () => {
|
it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => {
|
||||||
const cfg = makeCfg({
|
const cfg = makeCfg({
|
||||||
agents: {
|
agents: {
|
||||||
defaults: {
|
defaults: {
|
||||||
@@ -182,25 +182,23 @@ describe("runWithModelFallback – probe logic", () => {
|
|||||||
const almostExpired = NOW + 30 * 1000; // 30s remaining
|
const almostExpired = NOW + 30 * 1000; // 30s remaining
|
||||||
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
|
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
|
||||||
|
|
||||||
// Primary probe fails with 429
|
// Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns.
|
||||||
const run = vi
|
const run = vi
|
||||||
.fn()
|
.fn()
|
||||||
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
|
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
|
||||||
.mockResolvedValue("should-not-reach");
|
.mockResolvedValue("fallback-ok");
|
||||||
|
|
||||||
try {
|
const result = await runWithModelFallback({
|
||||||
await runWithModelFallback({
|
cfg,
|
||||||
cfg,
|
provider: "openai",
|
||||||
provider: "openai",
|
model: "gpt-4.1-mini",
|
||||||
model: "gpt-4.1-mini",
|
run,
|
||||||
run,
|
});
|
||||||
});
|
|
||||||
expect.unreachable("should have thrown since all candidates exhausted");
|
expect(result.result).toBe("fallback-ok");
|
||||||
} catch {
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
// Primary was probed (i === 0 + within margin), non-primary were skipped
|
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
|
||||||
expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
|
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
|
||||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("throttles probe when called within 30s interval", async () => {
|
it("throttles probe when called within 30s interval", async () => {
|
||||||
|
|||||||
@@ -143,10 +143,22 @@ async function expectSkippedUnavailableProvider(params: {
|
|||||||
}) {
|
}) {
|
||||||
const provider = `${params.providerPrefix}-${crypto.randomUUID()}`;
|
const provider = `${params.providerPrefix}-${crypto.randomUUID()}`;
|
||||||
const cfg = makeProviderFallbackCfg(provider);
|
const cfg = makeProviderFallbackCfg(provider);
|
||||||
const store = makeSingleProviderStore({
|
const primaryStore = makeSingleProviderStore({
|
||||||
provider,
|
provider,
|
||||||
usageStat: params.usageStat,
|
usageStat: params.usageStat,
|
||||||
});
|
});
|
||||||
|
// Include fallback provider profile so the fallback is attempted (not skipped as no-profile).
|
||||||
|
const store: AuthProfileStore = {
|
||||||
|
...primaryStore,
|
||||||
|
profiles: {
|
||||||
|
...primaryStore.profiles,
|
||||||
|
"fallback:default": {
|
||||||
|
type: "api_key",
|
||||||
|
provider: "fallback",
|
||||||
|
key: "test-key",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
const run = createFallbackOnlyRun();
|
const run = createFallbackOnlyRun();
|
||||||
|
|
||||||
const result = await runWithStoredAuth({
|
const result = await runWithStoredAuth({
|
||||||
@@ -436,11 +448,11 @@ describe("runWithModelFallback", () => {
|
|||||||
run,
|
run,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Override model failed with model_not_found → falls back to configured primary.
|
// Override model failed with model_not_found → tries fallbacks first (same provider).
|
||||||
expect(result.result).toBe("ok");
|
expect(result.result).toBe("ok");
|
||||||
expect(run).toHaveBeenCalledTimes(2);
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
expect(run.mock.calls[1]?.[0]).toBe("openai");
|
expect(run.mock.calls[1]?.[0]).toBe("anthropic");
|
||||||
expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini");
|
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("skips providers when all profiles are in cooldown", async () => {
|
it("skips providers when all profiles are in cooldown", async () => {
|
||||||
@@ -794,6 +806,296 @@ describe("runWithModelFallback", () => {
|
|||||||
expect(result.provider).toBe("openai");
|
expect(result.provider).toBe("openai");
|
||||||
expect(result.model).toBe("gpt-4.1-mini");
|
expect(result.model).toBe("gpt-4.1-mini");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Tests for Bug A fix: Model fallback with session overrides
|
||||||
|
describe("fallback behavior with session model overrides", () => {
|
||||||
|
it("allows fallbacks when session model differs from config within same provider", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error("Rate limit exceeded")) // Session model fails
|
||||||
|
.mockResolvedValueOnce("fallback success"); // First fallback succeeds
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-sonnet-4-20250514", // Different from config primary
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("fallback success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-20250514");
|
||||||
|
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-sonnet-4-5"); // Fallback tried
|
||||||
|
});
|
||||||
|
|
||||||
|
it("allows fallbacks with model version differences within same provider", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error("Weekly quota exceeded"))
|
||||||
|
.mockResolvedValueOnce("groq success");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-5", // Version difference from config
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("groq success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("still skips fallbacks when using different provider than config", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: [], // Empty fallbacks to match working pattern
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error('No credentials found for profile "openai:default".'))
|
||||||
|
.mockResolvedValueOnce("config primary worked");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "openai", // Different provider
|
||||||
|
model: "gpt-4.1-mini",
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Cross-provider requests should skip configured fallbacks but still try configured primary
|
||||||
|
expect(result.result).toBe("config primary worked");
|
||||||
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); // Original request
|
||||||
|
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-opus-4-6"); // Config primary as final fallback
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses fallbacks when session model exactly matches config primary", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error("Quota exceeded"))
|
||||||
|
.mockResolvedValueOnce("fallback worked");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-6", // Exact match
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("fallback worked");
|
||||||
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Tests for Bug B fix: Rate limit vs auth/billing cooldown distinction
|
||||||
|
describe("fallback behavior with provider cooldowns", () => {
|
||||||
|
async function makeAuthStoreWithCooldown(
|
||||||
|
provider: string,
|
||||||
|
reason: "rate_limit" | "auth" | "billing",
|
||||||
|
): Promise<{ store: AuthProfileStore; dir: string }> {
|
||||||
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||||
|
const now = Date.now();
|
||||||
|
const store: AuthProfileStore = {
|
||||||
|
version: AUTH_STORE_VERSION,
|
||||||
|
profiles: {
|
||||||
|
[`${provider}:default`]: { type: "api_key", provider, key: "test-key" },
|
||||||
|
},
|
||||||
|
usageStats: {
|
||||||
|
[`${provider}:default`]:
|
||||||
|
reason === "rate_limit"
|
||||||
|
? {
|
||||||
|
// Real rate-limit cooldowns are tracked through cooldownUntil
|
||||||
|
// and failureCounts, not disabledReason.
|
||||||
|
cooldownUntil: now + 300000,
|
||||||
|
failureCounts: { rate_limit: 1 },
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
// Auth/billing issues use disabledUntil
|
||||||
|
disabledUntil: now + 300000,
|
||||||
|
disabledReason: reason,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
saveAuthProfileStore(store, tmpDir);
|
||||||
|
return { store, dir: tmpDir };
|
||||||
|
}
|
||||||
|
|
||||||
|
it("attempts same-provider fallbacks during rate limit cooldown", async () => {
|
||||||
|
const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi.fn().mockResolvedValueOnce("sonnet success"); // Fallback succeeds
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-6",
|
||||||
|
run,
|
||||||
|
agentDir: dir,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("sonnet success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
|
||||||
|
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi.fn().mockResolvedValueOnce("groq success");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-6",
|
||||||
|
run,
|
||||||
|
agentDir: dir,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("groq success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(1);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => {
|
||||||
|
const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi.fn().mockResolvedValueOnce("groq success");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-6",
|
||||||
|
run,
|
||||||
|
agentDir: dir,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("groq success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(1);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("tries cross-provider fallbacks when same provider has rate limit", async () => {
|
||||||
|
// Anthropic in rate limit cooldown, Groq available
|
||||||
|
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-test-"));
|
||||||
|
const store: AuthProfileStore = {
|
||||||
|
version: AUTH_STORE_VERSION,
|
||||||
|
profiles: {
|
||||||
|
"anthropic:default": { type: "api_key", provider: "anthropic", key: "test-key" },
|
||||||
|
"groq:default": { type: "api_key", provider: "groq", key: "test-key" },
|
||||||
|
},
|
||||||
|
usageStats: {
|
||||||
|
"anthropic:default": {
|
||||||
|
// Rate-limit reason is inferred from failureCounts for cooldown windows.
|
||||||
|
cooldownUntil: Date.now() + 300000,
|
||||||
|
failureCounts: { rate_limit: 2 },
|
||||||
|
},
|
||||||
|
// Groq not in cooldown
|
||||||
|
},
|
||||||
|
};
|
||||||
|
saveAuthProfileStore(store, tmpDir);
|
||||||
|
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-opus-4-6",
|
||||||
|
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error("Still rate limited")) // Sonnet still fails
|
||||||
|
.mockResolvedValueOnce("groq success"); // Groq works
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-opus-4-6",
|
||||||
|
run,
|
||||||
|
agentDir: tmpDir,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("groq success");
|
||||||
|
expect(run).toHaveBeenCalledTimes(2);
|
||||||
|
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt
|
||||||
|
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("runWithImageModelFallback", () => {
|
describe("runWithImageModelFallback", () => {
|
||||||
|
|||||||
@@ -224,21 +224,21 @@ function resolveFallbackCandidates(params: {
|
|||||||
const configuredFallbacks = resolveAgentModelFallbackValues(
|
const configuredFallbacks = resolveAgentModelFallbackValues(
|
||||||
params.cfg?.agents?.defaults?.model,
|
params.cfg?.agents?.defaults?.model,
|
||||||
);
|
);
|
||||||
if (sameModelCandidate(normalizedPrimary, configuredPrimary)) {
|
// When user runs a different provider than config, only use configured fallbacks
|
||||||
return configuredFallbacks;
|
// if the current model is already in that chain (e.g. session on first fallback).
|
||||||
}
|
if (normalizedPrimary.provider !== configuredPrimary.provider) {
|
||||||
// Preserve resilience after failover: when current model is one of the
|
const isConfiguredFallback = configuredFallbacks.some((raw) => {
|
||||||
// configured fallback refs, keep traversing the configured fallback chain.
|
const resolved = resolveModelRefFromString({
|
||||||
const isConfiguredFallback = configuredFallbacks.some((raw) => {
|
raw: String(raw ?? ""),
|
||||||
const resolved = resolveModelRefFromString({
|
defaultProvider,
|
||||||
raw: String(raw ?? ""),
|
aliasIndex,
|
||||||
defaultProvider,
|
});
|
||||||
aliasIndex,
|
return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false;
|
||||||
});
|
});
|
||||||
return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false;
|
return isConfiguredFallback ? configuredFallbacks : [];
|
||||||
});
|
}
|
||||||
// Keep legacy override behavior for ad-hoc models outside configured chain.
|
// Same provider: always use full fallback chain (model version differences within provider).
|
||||||
return isConfiguredFallback ? configuredFallbacks : [];
|
return configuredFallbacks;
|
||||||
})();
|
})();
|
||||||
|
|
||||||
for (const raw of modelFallbacks) {
|
for (const raw of modelFallbacks) {
|
||||||
@@ -306,6 +306,76 @@ export const _probeThrottleInternals = {
|
|||||||
resolveProbeThrottleKey,
|
resolveProbeThrottleKey,
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
|
type CooldownDecision =
|
||||||
|
| {
|
||||||
|
type: "skip";
|
||||||
|
reason: FailoverReason;
|
||||||
|
error: string;
|
||||||
|
}
|
||||||
|
| {
|
||||||
|
type: "attempt";
|
||||||
|
reason: FailoverReason;
|
||||||
|
markProbe: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
function resolveCooldownDecision(params: {
|
||||||
|
candidate: ModelCandidate;
|
||||||
|
isPrimary: boolean;
|
||||||
|
requestedModel: boolean;
|
||||||
|
hasFallbackCandidates: boolean;
|
||||||
|
now: number;
|
||||||
|
probeThrottleKey: string;
|
||||||
|
authStore: ReturnType<typeof ensureAuthProfileStore>;
|
||||||
|
profileIds: string[];
|
||||||
|
}): CooldownDecision {
|
||||||
|
const shouldProbe = shouldProbePrimaryDuringCooldown({
|
||||||
|
isPrimary: params.isPrimary,
|
||||||
|
hasFallbackCandidates: params.hasFallbackCandidates,
|
||||||
|
now: params.now,
|
||||||
|
throttleKey: params.probeThrottleKey,
|
||||||
|
authStore: params.authStore,
|
||||||
|
profileIds: params.profileIds,
|
||||||
|
});
|
||||||
|
|
||||||
|
const inferredReason =
|
||||||
|
resolveProfilesUnavailableReason({
|
||||||
|
store: params.authStore,
|
||||||
|
profileIds: params.profileIds,
|
||||||
|
now: params.now,
|
||||||
|
}) ?? "rate_limit";
|
||||||
|
const isPersistentIssue =
|
||||||
|
inferredReason === "auth" ||
|
||||||
|
inferredReason === "auth_permanent" ||
|
||||||
|
inferredReason === "billing";
|
||||||
|
if (isPersistentIssue) {
|
||||||
|
return {
|
||||||
|
type: "skip",
|
||||||
|
reason: inferredReason,
|
||||||
|
error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// For primary: try when requested model or when probe allows.
|
||||||
|
// For same-provider fallbacks: only relax cooldown on rate_limit, which
|
||||||
|
// is commonly model-scoped and can recover on a sibling model.
|
||||||
|
const shouldAttemptDespiteCooldown =
|
||||||
|
(params.isPrimary && (!params.requestedModel || shouldProbe)) ||
|
||||||
|
(!params.isPrimary && inferredReason === "rate_limit");
|
||||||
|
if (!shouldAttemptDespiteCooldown) {
|
||||||
|
return {
|
||||||
|
type: "skip",
|
||||||
|
reason: inferredReason,
|
||||||
|
error: `Provider ${params.candidate.provider} is in cooldown (all profiles unavailable)`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: "attempt",
|
||||||
|
reason: inferredReason,
|
||||||
|
markProbe: params.isPrimary && shouldProbe,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export async function runWithModelFallback<T>(params: {
|
export async function runWithModelFallback<T>(params: {
|
||||||
cfg: OpenClawConfig | undefined;
|
cfg: OpenClawConfig | undefined;
|
||||||
provider: string;
|
provider: string;
|
||||||
@@ -342,41 +412,38 @@ export async function runWithModelFallback<T>(params: {
|
|||||||
|
|
||||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||||
// All profiles for this provider are in cooldown.
|
// All profiles for this provider are in cooldown.
|
||||||
// For the primary model (i === 0), probe it if the soonest cooldown
|
const isPrimary = i === 0;
|
||||||
// expiry is close or already past. This avoids staying on a fallback
|
const requestedModel =
|
||||||
// model long after the real rate-limit window clears.
|
params.provider === candidate.provider && params.model === candidate.model;
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir);
|
const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir);
|
||||||
const shouldProbe = shouldProbePrimaryDuringCooldown({
|
const decision = resolveCooldownDecision({
|
||||||
isPrimary: i === 0,
|
candidate,
|
||||||
|
isPrimary,
|
||||||
|
requestedModel,
|
||||||
hasFallbackCandidates,
|
hasFallbackCandidates,
|
||||||
now,
|
now,
|
||||||
throttleKey: probeThrottleKey,
|
probeThrottleKey,
|
||||||
authStore,
|
authStore,
|
||||||
profileIds,
|
profileIds,
|
||||||
});
|
});
|
||||||
if (!shouldProbe) {
|
|
||||||
const inferredReason =
|
if (decision.type === "skip") {
|
||||||
resolveProfilesUnavailableReason({
|
|
||||||
store: authStore,
|
|
||||||
profileIds,
|
|
||||||
now,
|
|
||||||
}) ?? "rate_limit";
|
|
||||||
// Skip without attempting
|
|
||||||
attempts.push({
|
attempts.push({
|
||||||
provider: candidate.provider,
|
provider: candidate.provider,
|
||||||
model: candidate.model,
|
model: candidate.model,
|
||||||
error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
|
error: decision.error,
|
||||||
reason: inferredReason,
|
reason: decision.reason,
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Primary model probe: attempt it despite cooldown to detect recovery.
|
|
||||||
// If it fails, the error is caught below and we fall through to the
|
if (decision.markProbe) {
|
||||||
// next candidate as usual.
|
lastProbeAttempt.set(probeThrottleKey, now);
|
||||||
lastProbeAttempt.set(probeThrottleKey, now);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await params.run(candidate.provider, candidate.model);
|
const result = await params.run(candidate.provider, candidate.model);
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user