fix(agents): handle overloaded failover separately (#38301)

* fix(agents): skip auth-profile failure on overload

* fix(agents): note overload auth-profile fallback fix

* fix(agents): classify overloaded failures separately

* fix(agents): back off before overload failover

* fix(agents): tighten overload probe and backoff state

* fix(agents): persist overloaded cooldown across runs

* fix(agents): tighten overloaded status handling

* test(agents): add overload regression coverage

* fix(agents): restore runner imports after rebase

* test(agents): add overload fallback integration coverage

* fix(agents): harden overloaded failover abort handling

* test(agents): tighten overload classifier coverage

* test(agents): cover all-overloaded fallback exhaustion

* fix(cron): retry overloaded fallback summaries

* fix(cron): treat HTTP 529 as overloaded retry
This commit is contained in:
Altay
2026-03-07 01:42:11 +03:00
committed by GitHub
parent 110ca23bab
commit 6e962d8b9e
36 changed files with 1036 additions and 84 deletions

View File

@@ -5,6 +5,7 @@ import {
ensureContextEnginesInitialized,
resolveContextEngine,
} from "../../context-engine/index.js";
import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
import { generateSecureToken } from "../../infra/secure-random.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
@@ -14,6 +15,7 @@ import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
import {
isProfileInCooldown,
type AuthProfileFailureReason,
markAuthProfileFailure,
markAuthProfileGood,
markAuthProfileUsed,
@@ -79,6 +81,14 @@ type CopilotTokenState = {
const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
// enough that fallback still feels responsive within a single turn.
const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
initialMs: 250,
maxMs: 1_500,
factor: 2,
jitter: 0.2,
};
// Avoid Anthropic's refusal test token poisoning session transcripts.
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
@@ -649,21 +659,21 @@ export async function runEmbeddedPiAgent(
profileIds: autoProfileCandidates,
}) ?? "rate_limit")
: null;
const allowRateLimitCooldownProbe =
params.allowRateLimitCooldownProbe === true &&
const allowTransientCooldownProbe =
params.allowTransientCooldownProbe === true &&
allAutoProfilesInCooldown &&
unavailableReason === "rate_limit";
let didRateLimitCooldownProbe = false;
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
let didTransientCooldownProbe = false;
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
const inCooldown =
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
if (inCooldown) {
if (allowRateLimitCooldownProbe && !didRateLimitCooldownProbe) {
didRateLimitCooldownProbe = true;
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
didTransientCooldownProbe = true;
log.warn(
`probing cooldowned auth profile for ${provider}/${modelId} due to rate_limit unavailability`,
`probing cooldowned auth profile for ${provider}/${modelId} due to ${unavailableReason ?? "transient"} unavailability`,
);
} else {
profileIndex += 1;
@@ -722,9 +732,10 @@ export async function runEmbeddedPiAgent(
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
let autoCompactionCount = 0;
let runLoopIterations = 0;
let overloadFailoverAttempts = 0;
const maybeMarkAuthProfileFailure = async (failure: {
profileId?: string;
reason?: Parameters<typeof markAuthProfileFailure>[0]["reason"] | null;
reason?: AuthProfileFailureReason | null;
config?: RunEmbeddedPiAgentParams["config"];
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
}) => {
@@ -740,6 +751,36 @@ export async function runEmbeddedPiAgent(
agentDir,
});
};
const resolveAuthProfileFailureReason = (
failoverReason: FailoverReason | null,
): AuthProfileFailureReason | null => {
// Timeouts are transport/model-path failures, not auth health signals,
// so they should not persist auth-profile failure state.
if (!failoverReason || failoverReason === "timeout") {
return null;
}
return failoverReason;
};
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
if (reason !== "overloaded") {
return;
}
overloadFailoverAttempts += 1;
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
log.warn(
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
);
try {
await sleepWithAbort(delayMs, params.abortSignal);
} catch (err) {
if (params.abortSignal?.aborted) {
const abortErr = new Error("Operation aborted", { cause: err });
abortErr.name = "AbortError";
throw abortErr;
}
throw err;
}
};
// Resolve the context engine once and reuse across retries to avoid
// repeated initialization/connection overhead per attempt.
ensureContextEnginesInitialized();
@@ -1165,15 +1206,19 @@ export async function runEmbeddedPiAgent(
};
}
const promptFailoverReason = classifyFailoverReason(errorText);
const promptProfileFailureReason =
resolveAuthProfileFailureReason(promptFailoverReason);
await maybeMarkAuthProfileFailure({
profileId: lastProfileId,
reason: promptFailoverReason,
reason: promptProfileFailureReason,
});
const promptFailoverFailure = isFailoverErrorMessage(errorText);
if (
isFailoverErrorMessage(errorText) &&
promptFailoverFailure &&
promptFailoverReason !== "timeout" &&
(await advanceAuthProfile())
) {
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
continue;
}
const fallbackThinking = pickFallbackThinkingLevel({
@@ -1187,9 +1232,11 @@ export async function runEmbeddedPiAgent(
thinkLevel = fallbackThinking;
continue;
}
// FIX: Throw FailoverError for prompt errors when fallbacks configured
// This enables model fallback for quota/rate limit errors during prompt submission
if (fallbackConfigured && isFailoverErrorMessage(errorText)) {
// Throw FailoverError for prompt-side failover reasons when fallbacks
// are configured so outer model fallback can continue on overload,
// rate-limit, auth, or billing failures.
if (fallbackConfigured && promptFailoverFailure) {
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
throw new FailoverError(errorText, {
reason: promptFailoverReason ?? "unknown",
provider,
@@ -1218,6 +1265,8 @@ export async function runEmbeddedPiAgent(
const billingFailure = isBillingAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
const assistantProfileFailureReason =
resolveAuthProfileFailureReason(assistantFailoverReason);
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
@@ -1257,10 +1306,7 @@ export async function runEmbeddedPiAgent(
if (shouldRotate) {
if (lastProfileId) {
const reason =
timedOut || assistantFailoverReason === "timeout"
? "timeout"
: (assistantFailoverReason ?? "unknown");
const reason = timedOut ? "timeout" : assistantProfileFailureReason;
// Skip cooldown for timeouts: a timeout is model/network-specific,
// not an auth issue. Marking the profile would poison fallback models
// on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
@@ -1280,10 +1326,12 @@ export async function runEmbeddedPiAgent(
const rotated = await advanceAuthProfile();
if (rotated) {
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
continue;
}
if (fallbackConfigured) {
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
// Prefer formatted error message (user-friendly) over raw errorMessage
const message =
(lastAssistant

View File

@@ -115,10 +115,10 @@ export type RunEmbeddedPiAgentParams = {
enforceFinalTag?: boolean;
/**
* Allow a single run attempt even when all auth profiles are in cooldown,
* but only for inferred `rate_limit` cooldowns.
* but only for inferred transient cooldowns like `rate_limit` or `overloaded`.
*
* This is used by model fallback when trying sibling models on providers
* where rate limits are often model-scoped.
* where transient service pressure is often model-scoped.
*/
allowRateLimitCooldownProbe?: boolean;
allowTransientCooldownProbe?: boolean;
};