mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 22:01:35 +00:00
fix(agents): harden model fallback failover paths
This commit is contained in:
@@ -8,6 +8,8 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
- Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231)
|
||||||
|
|
||||||
## 2026.2.24
|
## 2026.2.24
|
||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import type { AuthProfileStore } from "./auth-profiles.js";
|
|||||||
import { saveAuthProfileStore } from "./auth-profiles.js";
|
import { saveAuthProfileStore } from "./auth-profiles.js";
|
||||||
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
|
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
|
||||||
import { isAnthropicBillingError } from "./live-auth-keys.js";
|
import { isAnthropicBillingError } from "./live-auth-keys.js";
|
||||||
import { runWithModelFallback } from "./model-fallback.js";
|
import { runWithImageModelFallback, runWithModelFallback } from "./model-fallback.js";
|
||||||
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
|
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
|
||||||
|
|
||||||
const makeCfg = makeModelFallbackCfg;
|
const makeCfg = makeModelFallbackCfg;
|
||||||
@@ -581,6 +581,39 @@ describe("runWithModelFallback", () => {
|
|||||||
expect(calls).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]);
|
expect(calls).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("keeps explicit fallbacks reachable when models allowlist is present", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
primary: "anthropic/claude-sonnet-4",
|
||||||
|
fallbacks: ["openai/gpt-4o", "ollama/llama-3"],
|
||||||
|
},
|
||||||
|
models: {
|
||||||
|
"anthropic/claude-sonnet-4": {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
|
||||||
|
.mockResolvedValueOnce("ok");
|
||||||
|
|
||||||
|
const result = await runWithModelFallback({
|
||||||
|
cfg,
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-sonnet-4",
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("ok");
|
||||||
|
expect(run.mock.calls).toEqual([
|
||||||
|
["anthropic", "claude-sonnet-4"],
|
||||||
|
["openai", "gpt-4o"],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
it("defaults provider/model when missing (regression #946)", async () => {
|
it("defaults provider/model when missing (regression #946)", async () => {
|
||||||
const cfg = makeCfg({
|
const cfg = makeCfg({
|
||||||
agents: {
|
agents: {
|
||||||
@@ -721,6 +754,39 @@ describe("runWithModelFallback", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("runWithImageModelFallback", () => {
|
||||||
|
it("keeps explicit image fallbacks reachable when models allowlist is present", async () => {
|
||||||
|
const cfg = makeCfg({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
imageModel: {
|
||||||
|
primary: "openai/gpt-image-1",
|
||||||
|
fallbacks: ["google/gemini-2.5-flash-image-preview"],
|
||||||
|
},
|
||||||
|
models: {
|
||||||
|
"openai/gpt-image-1": {},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const run = vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error("rate limited"))
|
||||||
|
.mockResolvedValueOnce("ok");
|
||||||
|
|
||||||
|
const result = await runWithImageModelFallback({
|
||||||
|
cfg,
|
||||||
|
run,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.result).toBe("ok");
|
||||||
|
expect(run.mock.calls).toEqual([
|
||||||
|
["openai", "gpt-image-1"],
|
||||||
|
["google", "gemini-2.5-flash-image-preview"],
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("isAnthropicBillingError", () => {
|
describe("isAnthropicBillingError", () => {
|
||||||
it("does not false-positive on plain 'a 402' prose", () => {
|
it("does not false-positive on plain 'a 402' prose", () => {
|
||||||
const samples = [
|
const samples = [
|
||||||
|
|||||||
@@ -164,7 +164,9 @@ function resolveImageFallbackCandidates(params: {
|
|||||||
const imageFallbacks = resolveAgentModelFallbackValues(params.cfg?.agents?.defaults?.imageModel);
|
const imageFallbacks = resolveAgentModelFallbackValues(params.cfg?.agents?.defaults?.imageModel);
|
||||||
|
|
||||||
for (const raw of imageFallbacks) {
|
for (const raw of imageFallbacks) {
|
||||||
addRaw(raw, true);
|
// Explicitly configured image fallbacks should remain reachable even when a
|
||||||
|
// model allowlist is present.
|
||||||
|
addRaw(raw, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
return candidates;
|
return candidates;
|
||||||
@@ -235,7 +237,9 @@ function resolveFallbackCandidates(params: {
|
|||||||
if (!resolved) {
|
if (!resolved) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
addCandidate(resolved.ref, true);
|
// Fallbacks are explicit user intent; do not silently filter them by the
|
||||||
|
// model allowlist.
|
||||||
|
addCandidate(resolved.ref, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.fallbacksOverride === undefined && primary?.provider && primary.model) {
|
if (params.fallbacksOverride === undefined && primary?.provider && primary.model) {
|
||||||
|
|||||||
@@ -433,6 +433,12 @@ describe("classifyFailoverReason", () => {
|
|||||||
expect(classifyFailoverReason("Missing scopes: model.request")).toBe("auth");
|
expect(classifyFailoverReason("Missing scopes: model.request")).toBe("auth");
|
||||||
expect(classifyFailoverReason("429 too many requests")).toBe("rate_limit");
|
expect(classifyFailoverReason("429 too many requests")).toBe("rate_limit");
|
||||||
expect(classifyFailoverReason("resource has been exhausted")).toBe("rate_limit");
|
expect(classifyFailoverReason("resource has been exhausted")).toBe("rate_limit");
|
||||||
|
expect(
|
||||||
|
classifyFailoverReason("model_cooldown: All credentials for model gpt-5 are cooling down"),
|
||||||
|
).toBe("rate_limit");
|
||||||
|
expect(classifyFailoverReason("all credentials for model x are cooling down")).toBe(
|
||||||
|
"rate_limit",
|
||||||
|
);
|
||||||
expect(
|
expect(
|
||||||
classifyFailoverReason(
|
classifyFailoverReason(
|
||||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
||||||
|
|||||||
@@ -615,6 +615,8 @@ type ErrorPattern = RegExp | string;
|
|||||||
const ERROR_PATTERNS = {
|
const ERROR_PATTERNS = {
|
||||||
rateLimit: [
|
rateLimit: [
|
||||||
/rate[_ ]limit|too many requests|429/,
|
/rate[_ ]limit|too many requests|429/,
|
||||||
|
"model_cooldown",
|
||||||
|
"cooling down",
|
||||||
"exceeded your current quota",
|
"exceeded your current quota",
|
||||||
"resource has been exhausted",
|
"resource has been exhausted",
|
||||||
"quota exceeded",
|
"quota exceeded",
|
||||||
|
|||||||
@@ -109,6 +109,45 @@ const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawC
|
|||||||
},
|
},
|
||||||
}) satisfies OpenClawConfig;
|
}) satisfies OpenClawConfig;
|
||||||
|
|
||||||
|
const makeAgentOverrideOnlyFallbackConfig = (agentId: string): OpenClawConfig =>
|
||||||
|
({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
model: {
|
||||||
|
fallbacks: [],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [
|
||||||
|
{
|
||||||
|
id: agentId,
|
||||||
|
model: {
|
||||||
|
fallbacks: ["openai/mock-2"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
models: {
|
||||||
|
providers: {
|
||||||
|
openai: {
|
||||||
|
api: "openai-responses",
|
||||||
|
apiKey: "sk-test",
|
||||||
|
baseUrl: "https://example.com",
|
||||||
|
models: [
|
||||||
|
{
|
||||||
|
id: "mock-1",
|
||||||
|
name: "Mock 1",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 16_000,
|
||||||
|
maxTokens: 2048,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}) satisfies OpenClawConfig;
|
||||||
|
|
||||||
const writeAuthStore = async (
|
const writeAuthStore = async (
|
||||||
agentDir: string,
|
agentDir: string,
|
||||||
opts?: {
|
opts?: {
|
||||||
@@ -516,6 +555,42 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("treats agent-level fallbacks as configured when defaults have none", async () => {
|
||||||
|
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||||
|
await writeAuthStore(agentDir, {
|
||||||
|
usageStats: {
|
||||||
|
"openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 },
|
||||||
|
"openai:p2": { lastUsed: 2, cooldownUntil: now + 60 * 60 * 1000 },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
runEmbeddedPiAgent({
|
||||||
|
sessionId: "session:test",
|
||||||
|
sessionKey: "agent:support:cooldown-failover",
|
||||||
|
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||||
|
workspaceDir,
|
||||||
|
agentDir,
|
||||||
|
config: makeAgentOverrideOnlyFallbackConfig("support"),
|
||||||
|
prompt: "hello",
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-1",
|
||||||
|
authProfileIdSource: "auto",
|
||||||
|
timeoutMs: 5_000,
|
||||||
|
runId: "run:agent-override-fallback",
|
||||||
|
agentId: "support",
|
||||||
|
}),
|
||||||
|
).rejects.toMatchObject({
|
||||||
|
name: "FailoverError",
|
||||||
|
reason: "rate_limit",
|
||||||
|
provider: "openai",
|
||||||
|
model: "mock-1",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(runEmbeddedAttemptMock).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("fails over with disabled reason when all profiles are unavailable", async () => {
|
it("fails over with disabled reason when all profiles are unavailable", async () => {
|
||||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||||
await writeAuthStore(agentDir, {
|
await writeAuthStore(agentDir, {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
|
|||||||
import { enqueueCommandInLane } from "../../process/command-queue.js";
|
import { enqueueCommandInLane } from "../../process/command-queue.js";
|
||||||
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
|
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
|
||||||
import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||||||
|
import { resolveAgentModelFallbacksOverride } from "../agent-scope.js";
|
||||||
import {
|
import {
|
||||||
isProfileInCooldown,
|
isProfileInCooldown,
|
||||||
markAuthProfileFailure,
|
markAuthProfileFailure,
|
||||||
@@ -231,8 +232,15 @@ export async function runEmbeddedPiAgent(
|
|||||||
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
|
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
|
||||||
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
|
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
|
||||||
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
|
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
|
||||||
|
const agentFallbacksOverride =
|
||||||
|
params.config && params.agentId
|
||||||
|
? resolveAgentModelFallbacksOverride(params.config, params.agentId)
|
||||||
|
: undefined;
|
||||||
const fallbackConfigured =
|
const fallbackConfigured =
|
||||||
resolveAgentModelFallbackValues(params.config?.agents?.defaults?.model).length > 0;
|
(
|
||||||
|
agentFallbacksOverride ??
|
||||||
|
resolveAgentModelFallbackValues(params.config?.agents?.defaults?.model)
|
||||||
|
).length > 0;
|
||||||
await ensureOpenClawModelsJson(params.config, agentDir);
|
await ensureOpenClawModelsJson(params.config, agentDir);
|
||||||
|
|
||||||
// Run before_model_resolve hooks early so plugins can override the
|
// Run before_model_resolve hooks early so plugins can override the
|
||||||
|
|||||||
@@ -61,10 +61,10 @@ describe("agent-runner-utils", () => {
|
|||||||
|
|
||||||
const resolved = resolveModelFallbackOptions(run);
|
const resolved = resolveModelFallbackOptions(run);
|
||||||
|
|
||||||
expect(hoisted.resolveAgentIdFromSessionKeyMock).toHaveBeenCalledWith(run.sessionKey);
|
expect(hoisted.resolveAgentIdFromSessionKeyMock).not.toHaveBeenCalled();
|
||||||
expect(hoisted.resolveAgentModelFallbacksOverrideMock).toHaveBeenCalledWith(
|
expect(hoisted.resolveAgentModelFallbacksOverrideMock).toHaveBeenCalledWith(
|
||||||
run.config,
|
run.config,
|
||||||
"agent-id",
|
run.agentId,
|
||||||
);
|
);
|
||||||
expect(resolved).toEqual({
|
expect(resolved).toEqual({
|
||||||
cfg: run.config,
|
cfg: run.config,
|
||||||
@@ -75,6 +75,21 @@ describe("agent-runner-utils", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("falls back to sessionKey agent id when run.agentId is missing", () => {
|
||||||
|
hoisted.resolveAgentIdFromSessionKeyMock.mockReturnValue("agent-from-session-key");
|
||||||
|
hoisted.resolveAgentModelFallbacksOverrideMock.mockReturnValue(["fallback-model"]);
|
||||||
|
const run = makeRun({ agentId: undefined });
|
||||||
|
|
||||||
|
const resolved = resolveModelFallbackOptions(run);
|
||||||
|
|
||||||
|
expect(hoisted.resolveAgentIdFromSessionKeyMock).toHaveBeenCalledWith(run.sessionKey);
|
||||||
|
expect(hoisted.resolveAgentModelFallbacksOverrideMock).toHaveBeenCalledWith(
|
||||||
|
run.config,
|
||||||
|
"agent-from-session-key",
|
||||||
|
);
|
||||||
|
expect(resolved.fallbacksOverride).toEqual(["fallback-model"]);
|
||||||
|
});
|
||||||
|
|
||||||
it("builds embedded run base params with auth profile and run metadata", () => {
|
it("builds embedded run base params with auth profile and run metadata", () => {
|
||||||
const run = makeRun({ enforceFinalTag: true });
|
const run = makeRun({ enforceFinalTag: true });
|
||||||
const authProfile = resolveProviderScopedAuthProfile({
|
const authProfile = resolveProviderScopedAuthProfile({
|
||||||
|
|||||||
@@ -147,15 +147,13 @@ export const resolveEnforceFinalTag = (run: FollowupRun["run"], provider: string
|
|||||||
Boolean(run.enforceFinalTag || isReasoningTagProvider(provider));
|
Boolean(run.enforceFinalTag || isReasoningTagProvider(provider));
|
||||||
|
|
||||||
export function resolveModelFallbackOptions(run: FollowupRun["run"]) {
|
export function resolveModelFallbackOptions(run: FollowupRun["run"]) {
|
||||||
|
const fallbackAgentId = run.agentId ?? resolveAgentIdFromSessionKey(run.sessionKey);
|
||||||
return {
|
return {
|
||||||
cfg: run.config,
|
cfg: run.config,
|
||||||
provider: run.provider,
|
provider: run.provider,
|
||||||
model: run.model,
|
model: run.model,
|
||||||
agentDir: run.agentDir,
|
agentDir: run.agentDir,
|
||||||
fallbacksOverride: resolveAgentModelFallbacksOverride(
|
fallbacksOverride: resolveAgentModelFallbacksOverride(run.config, fallbackAgentId),
|
||||||
run.config,
|
|
||||||
resolveAgentIdFromSessionKey(run.sessionKey),
|
|
||||||
),
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ export function createFollowupRunner(params: {
|
|||||||
agentDir: queued.run.agentDir,
|
agentDir: queued.run.agentDir,
|
||||||
fallbacksOverride: resolveAgentModelFallbacksOverride(
|
fallbacksOverride: resolveAgentModelFallbacksOverride(
|
||||||
queued.run.config,
|
queued.run.config,
|
||||||
resolveAgentIdFromSessionKey(queued.run.sessionKey),
|
queued.run.agentId ?? resolveAgentIdFromSessionKey(queued.run.sessionKey),
|
||||||
),
|
),
|
||||||
run: (provider, model) => {
|
run: (provider, model) => {
|
||||||
const authProfile = resolveRunAuthProfile(queued.run, provider);
|
const authProfile = resolveRunAuthProfile(queued.run, provider);
|
||||||
|
|||||||
Reference in New Issue
Block a user