mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 07:04:32 +00:00
fix(agents): handle overloaded failover separately (#38301)
* fix(agents): skip auth-profile failure on overload * fix(agents): note overload auth-profile fallback fix * fix(agents): classify overloaded failures separately * fix(agents): back off before overload failover * fix(agents): tighten overload probe and backoff state * fix(agents): persist overloaded cooldown across runs * fix(agents): tighten overloaded status handling * test(agents): add overload regression coverage * fix(agents): restore runner imports after rebase * test(agents): add overload fallback integration coverage * fix(agents): harden overloaded failover abort handling * test(agents): tighten overload classifier coverage * test(agents): cover all-overloaded fallback exhaustion * fix(cron): retry overloaded fallback summaries * fix(cron): treat HTTP 529 as overloaded retry
This commit is contained in:
@@ -39,6 +39,7 @@ export type AuthProfileFailureReason =
|
||||
| "auth"
|
||||
| "auth_permanent"
|
||||
| "format"
|
||||
| "overloaded"
|
||||
| "rate_limit"
|
||||
| "billing"
|
||||
| "timeout"
|
||||
|
||||
@@ -177,6 +177,24 @@ describe("resolveProfilesUnavailableReason", () => {
|
||||
).toBe("auth");
|
||||
});
|
||||
|
||||
it("returns overloaded for active overloaded cooldown windows", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
"anthropic:default": {
|
||||
cooldownUntil: now + 60_000,
|
||||
failureCounts: { overloaded: 2, rate_limit: 1 },
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveProfilesUnavailableReason({
|
||||
store,
|
||||
profileIds: ["anthropic:default"],
|
||||
now,
|
||||
}),
|
||||
).toBe("overloaded");
|
||||
});
|
||||
|
||||
it("falls back to rate_limit when active cooldown has no reason history", () => {
|
||||
const now = Date.now();
|
||||
const store = makeStore({
|
||||
|
||||
@@ -9,6 +9,7 @@ const FAILURE_REASON_PRIORITY: AuthProfileFailureReason[] = [
|
||||
"billing",
|
||||
"format",
|
||||
"model_not_found",
|
||||
"overloaded",
|
||||
"timeout",
|
||||
"rate_limit",
|
||||
"unknown",
|
||||
@@ -35,7 +36,7 @@ export function resolveProfileUnusableUntil(
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a profile is currently in cooldown (due to rate limiting or errors).
|
||||
* Check if a profile is currently in cooldown (due to rate limits, overload, or other transient failures).
|
||||
*/
|
||||
export function isProfileInCooldown(
|
||||
store: AuthProfileStore,
|
||||
@@ -508,7 +509,7 @@ export async function markAuthProfileFailure(params: {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
|
||||
* Mark a profile as transiently failed. Applies exponential backoff cooldown.
|
||||
* Cooldown times: 1min, 5min, 25min, max 1 hour.
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user