mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 04:01:23 +00:00
(fix): handle Cloudflare 521 and transient 5xx errors gracefully (#13500)
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: a8347e95c5
Co-authored-by: rodrigouroz <384037+rodrigouroz@users.noreply.github.com>
Co-authored-by: Takhoffman <781889+Takhoffman@users.noreply.github.com>
Reviewed-by: @Takhoffman
This commit is contained in:
@@ -14,6 +14,7 @@ import {
|
||||
isCompactionFailureError,
|
||||
isContextOverflowError,
|
||||
isLikelyContextOverflowError,
|
||||
isTransientHttpError,
|
||||
sanitizeUserFacingText,
|
||||
} from "../../agents/pi-embedded-helpers.js";
|
||||
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
|
||||
@@ -79,6 +80,7 @@ export async function runAgentTurnWithFallback(params: {
|
||||
storePath?: string;
|
||||
resolvedVerboseLevel: VerboseLevel;
|
||||
}): Promise<AgentRunLoopResult> {
|
||||
const TRANSIENT_HTTP_RETRY_DELAY_MS = 2_500;
|
||||
let didLogHeartbeatStrip = false;
|
||||
let autoCompactionCompleted = false;
|
||||
// Track payloads sent directly (not via pipeline) during tool flush to avoid duplicates.
|
||||
@@ -97,6 +99,7 @@ export async function runAgentTurnWithFallback(params: {
|
||||
let fallbackProvider = params.followupRun.run.provider;
|
||||
let fallbackModel = params.followupRun.run.model;
|
||||
let didResetAfterCompactionFailure = false;
|
||||
let didRetryTransientHttpError = false;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
@@ -506,6 +509,7 @@ export async function runAgentTurnWithFallback(params: {
|
||||
const isCompactionFailure = isCompactionFailureError(message);
|
||||
const isSessionCorruption = /function call turn comes immediately after/i.test(message);
|
||||
const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
|
||||
const isTransientHttp = isTransientHttpError(message);
|
||||
|
||||
if (
|
||||
isCompactionFailure &&
|
||||
@@ -577,8 +581,26 @@ export async function runAgentTurnWithFallback(params: {
|
||||
};
|
||||
}
|
||||
|
||||
if (isTransientHttp && !didRetryTransientHttpError) {
|
||||
didRetryTransientHttpError = true;
|
||||
// Retry the full runWithModelFallback() cycle — transient errors
|
||||
// (502/521/etc.) typically affect the whole provider, so falling
|
||||
// back to an alternate model first would not help. Instead we wait
|
||||
// and retry the complete primary→fallback chain.
|
||||
defaultRuntime.error(
|
||||
`Transient HTTP provider error before reply (${message}). Retrying once in ${TRANSIENT_HTTP_RETRY_DELAY_MS}ms.`,
|
||||
);
|
||||
await new Promise<void>((resolve) => {
|
||||
setTimeout(resolve, TRANSIENT_HTTP_RETRY_DELAY_MS);
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
|
||||
const trimmedMessage = message.replace(/\.\s*$/, "");
|
||||
const safeMessage = isTransientHttp
|
||||
? sanitizeUserFacingText(message, { errorContext: true })
|
||||
: message;
|
||||
const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
|
||||
const fallbackText = isContextOverflow
|
||||
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
|
||||
: isRoleOrderingError
|
||||
|
||||
Reference in New Issue
Block a user