mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 13:44:58 +00:00
fix(agents): handle overloaded failover separately (#38301)
* fix(agents): skip auth-profile failure on overload * fix(agents): note overload auth-profile fallback fix * fix(agents): classify overloaded failures separately * fix(agents): back off before overload failover * fix(agents): tighten overload probe and backoff state * fix(agents): persist overloaded cooldown across runs * fix(agents): tighten overloaded status handling * test(agents): add overload regression coverage * fix(agents): restore runner imports after rebase * test(agents): add overload fallback integration coverage * fix(agents): harden overloaded failover abort handling * test(agents): tighten overload classifier coverage * test(agents): cover all-overloaded fallback exhaustion * fix(cron): retry overloaded fallback summaries * fix(cron): treat HTTP 529 as overloaded retry
This commit is contained in:
@@ -5,6 +5,7 @@ import {
|
||||
ensureContextEnginesInitialized,
|
||||
resolveContextEngine,
|
||||
} from "../../context-engine/index.js";
|
||||
import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
|
||||
import { generateSecureToken } from "../../infra/secure-random.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
|
||||
@@ -14,6 +15,7 @@ import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||||
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
|
||||
import {
|
||||
isProfileInCooldown,
|
||||
type AuthProfileFailureReason,
|
||||
markAuthProfileFailure,
|
||||
markAuthProfileGood,
|
||||
markAuthProfileUsed,
|
||||
@@ -79,6 +81,14 @@ type CopilotTokenState = {
|
||||
const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
|
||||
const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
|
||||
const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
|
||||
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
|
||||
// enough that fallback still feels responsive within a single turn.
|
||||
const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
|
||||
initialMs: 250,
|
||||
maxMs: 1_500,
|
||||
factor: 2,
|
||||
jitter: 0.2,
|
||||
};
|
||||
|
||||
// Avoid Anthropic's refusal test token poisoning session transcripts.
|
||||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||||
@@ -649,21 +659,21 @@ export async function runEmbeddedPiAgent(
|
||||
profileIds: autoProfileCandidates,
|
||||
}) ?? "rate_limit")
|
||||
: null;
|
||||
const allowRateLimitCooldownProbe =
|
||||
params.allowRateLimitCooldownProbe === true &&
|
||||
const allowTransientCooldownProbe =
|
||||
params.allowTransientCooldownProbe === true &&
|
||||
allAutoProfilesInCooldown &&
|
||||
unavailableReason === "rate_limit";
|
||||
let didRateLimitCooldownProbe = false;
|
||||
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
|
||||
let didTransientCooldownProbe = false;
|
||||
|
||||
while (profileIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[profileIndex];
|
||||
const inCooldown =
|
||||
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
|
||||
if (inCooldown) {
|
||||
if (allowRateLimitCooldownProbe && !didRateLimitCooldownProbe) {
|
||||
didRateLimitCooldownProbe = true;
|
||||
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
|
||||
didTransientCooldownProbe = true;
|
||||
log.warn(
|
||||
`probing cooldowned auth profile for ${provider}/${modelId} due to rate_limit unavailability`,
|
||||
`probing cooldowned auth profile for ${provider}/${modelId} due to ${unavailableReason ?? "transient"} unavailability`,
|
||||
);
|
||||
} else {
|
||||
profileIndex += 1;
|
||||
@@ -722,9 +732,10 @@ export async function runEmbeddedPiAgent(
|
||||
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||||
let autoCompactionCount = 0;
|
||||
let runLoopIterations = 0;
|
||||
let overloadFailoverAttempts = 0;
|
||||
const maybeMarkAuthProfileFailure = async (failure: {
|
||||
profileId?: string;
|
||||
reason?: Parameters<typeof markAuthProfileFailure>[0]["reason"] | null;
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
config?: RunEmbeddedPiAgentParams["config"];
|
||||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||||
}) => {
|
||||
@@ -740,6 +751,36 @@ export async function runEmbeddedPiAgent(
|
||||
agentDir,
|
||||
});
|
||||
};
|
||||
const resolveAuthProfileFailureReason = (
|
||||
failoverReason: FailoverReason | null,
|
||||
): AuthProfileFailureReason | null => {
|
||||
// Timeouts are transport/model-path failures, not auth health signals,
|
||||
// so they should not persist auth-profile failure state.
|
||||
if (!failoverReason || failoverReason === "timeout") {
|
||||
return null;
|
||||
}
|
||||
return failoverReason;
|
||||
};
|
||||
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
|
||||
if (reason !== "overloaded") {
|
||||
return;
|
||||
}
|
||||
overloadFailoverAttempts += 1;
|
||||
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
|
||||
log.warn(
|
||||
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
|
||||
);
|
||||
try {
|
||||
await sleepWithAbort(delayMs, params.abortSignal);
|
||||
} catch (err) {
|
||||
if (params.abortSignal?.aborted) {
|
||||
const abortErr = new Error("Operation aborted", { cause: err });
|
||||
abortErr.name = "AbortError";
|
||||
throw abortErr;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
// Resolve the context engine once and reuse across retries to avoid
|
||||
// repeated initialization/connection overhead per attempt.
|
||||
ensureContextEnginesInitialized();
|
||||
@@ -1165,15 +1206,19 @@ export async function runEmbeddedPiAgent(
|
||||
};
|
||||
}
|
||||
const promptFailoverReason = classifyFailoverReason(errorText);
|
||||
const promptProfileFailureReason =
|
||||
resolveAuthProfileFailureReason(promptFailoverReason);
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason: promptFailoverReason,
|
||||
reason: promptProfileFailureReason,
|
||||
});
|
||||
const promptFailoverFailure = isFailoverErrorMessage(errorText);
|
||||
if (
|
||||
isFailoverErrorMessage(errorText) &&
|
||||
promptFailoverFailure &&
|
||||
promptFailoverReason !== "timeout" &&
|
||||
(await advanceAuthProfile())
|
||||
) {
|
||||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||||
continue;
|
||||
}
|
||||
const fallbackThinking = pickFallbackThinkingLevel({
|
||||
@@ -1187,9 +1232,11 @@ export async function runEmbeddedPiAgent(
|
||||
thinkLevel = fallbackThinking;
|
||||
continue;
|
||||
}
|
||||
// FIX: Throw FailoverError for prompt errors when fallbacks configured
|
||||
// This enables model fallback for quota/rate limit errors during prompt submission
|
||||
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
|
||||
// Throw FailoverError for prompt-side failover reasons when fallbacks
|
||||
// are configured so outer model fallback can continue on overload,
|
||||
// rate-limit, auth, or billing failures.
|
||||
if (fallbackConfigured && promptFailoverFailure) {
|
||||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||||
throw new FailoverError(errorText, {
|
||||
reason: promptFailoverReason ?? "unknown",
|
||||
provider,
|
||||
@@ -1218,6 +1265,8 @@ export async function runEmbeddedPiAgent(
|
||||
const billingFailure = isBillingAssistantError(lastAssistant);
|
||||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||||
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
|
||||
const assistantProfileFailureReason =
|
||||
resolveAuthProfileFailureReason(assistantFailoverReason);
|
||||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||||
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
|
||||
|
||||
@@ -1257,10 +1306,7 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
if (shouldRotate) {
|
||||
if (lastProfileId) {
|
||||
const reason =
|
||||
timedOut || assistantFailoverReason === "timeout"
|
||||
? "timeout"
|
||||
: (assistantFailoverReason ?? "unknown");
|
||||
const reason = timedOut ? "timeout" : assistantProfileFailureReason;
|
||||
// Skip cooldown for timeouts: a timeout is model/network-specific,
|
||||
// not an auth issue. Marking the profile would poison fallback models
|
||||
// on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
|
||||
@@ -1280,10 +1326,12 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
const rotated = await advanceAuthProfile();
|
||||
if (rotated) {
|
||||
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fallbackConfigured) {
|
||||
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
|
||||
// Prefer formatted error message (user-friendly) over raw errorMessage
|
||||
const message =
|
||||
(lastAssistant
|
||||
|
||||
@@ -115,10 +115,10 @@ export type RunEmbeddedPiAgentParams = {
|
||||
enforceFinalTag?: boolean;
|
||||
/**
|
||||
* Allow a single run attempt even when all auth profiles are in cooldown,
|
||||
* but only for inferred `rate_limit` cooldowns.
|
||||
* but only for inferred transient cooldowns like `rate_limit` or `overloaded`.
|
||||
*
|
||||
* This is used by model fallback when trying sibling models on providers
|
||||
* where rate limits are often model-scoped.
|
||||
* where transient service pressure is often model-scoped.
|
||||
*/
|
||||
allowRateLimitCooldownProbe?: boolean;
|
||||
allowTransientCooldownProbe?: boolean;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user