mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 23:13:43 +00:00
fix(infra): treat nested network request errors as non-fatal
This commit is contained in:
committed by
Peter Steinberger
parent
445c7a65e6
commit
daaad03593
@@ -35,6 +35,25 @@ const TRANSIENT_NETWORK_CODES = new Set([
|
||||
"UND_ERR_BODY_TIMEOUT",
|
||||
]);
|
||||
|
||||
const TRANSIENT_NETWORK_ERROR_NAMES = new Set([
|
||||
"AbortError",
|
||||
"ConnectTimeoutError",
|
||||
"HeadersTimeoutError",
|
||||
"BodyTimeoutError",
|
||||
"TimeoutError",
|
||||
]);
|
||||
|
||||
const TRANSIENT_NETWORK_MESSAGE_CODE_RE =
|
||||
/\b(ECONNRESET|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ESOCKETTIMEDOUT|ECONNABORTED|EPIPE|EHOSTUNREACH|ENETUNREACH|EAI_AGAIN|UND_ERR_CONNECT_TIMEOUT|UND_ERR_DNS_RESOLVE_FAILED|UND_ERR_CONNECT|UND_ERR_SOCKET|UND_ERR_HEADERS_TIMEOUT|UND_ERR_BODY_TIMEOUT)\b/i;
|
||||
|
||||
const TRANSIENT_NETWORK_MESSAGE_SNIPPETS = [
|
||||
"getaddrinfo",
|
||||
"socket hang up",
|
||||
"network error",
|
||||
"network is unreachable",
|
||||
"temporary failure in name resolution",
|
||||
];
|
||||
|
||||
function getErrorCause(err: unknown): unknown {
|
||||
if (!err || typeof err !== "object") {
|
||||
return undefined;
|
||||
@@ -42,6 +61,32 @@ function getErrorCause(err: unknown): unknown {
|
||||
return (err as { cause?: unknown }).cause;
|
||||
}
|
||||
|
||||
function getErrorName(err: unknown): string {
|
||||
if (!err || typeof err !== "object") {
|
||||
return "";
|
||||
}
|
||||
const name = (err as { name?: unknown }).name;
|
||||
return typeof name === "string" ? name : "";
|
||||
}
|
||||
|
||||
function extractErrorCodeOrErrno(err: unknown): string | undefined {
|
||||
const code = extractErrorCode(err);
|
||||
if (code) {
|
||||
return code.trim().toUpperCase();
|
||||
}
|
||||
if (!err || typeof err !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const errno = (err as { errno?: unknown }).errno;
|
||||
if (typeof errno === "string" && errno.trim()) {
|
||||
return errno.trim().toUpperCase();
|
||||
}
|
||||
if (typeof errno === "number" && Number.isFinite(errno)) {
|
||||
return String(errno);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function extractErrorCodeWithCause(err: unknown): string | undefined {
|
||||
const direct = extractErrorCode(err);
|
||||
if (direct) {
|
||||
@@ -50,6 +95,44 @@ function extractErrorCodeWithCause(err: unknown): string | undefined {
|
||||
return extractErrorCode(getErrorCause(err));
|
||||
}
|
||||
|
||||
function collectErrorCandidates(err: unknown): unknown[] {
|
||||
const queue: unknown[] = [err];
|
||||
const seen = new Set<unknown>();
|
||||
const candidates: unknown[] = [];
|
||||
|
||||
while (queue.length > 0) {
|
||||
const current = queue.shift();
|
||||
if (current == null || seen.has(current)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(current);
|
||||
candidates.push(current);
|
||||
|
||||
if (!current || typeof current !== "object") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const maybeNested: Array<unknown> = [
|
||||
(current as { cause?: unknown }).cause,
|
||||
(current as { reason?: unknown }).reason,
|
||||
(current as { original?: unknown }).original,
|
||||
(current as { error?: unknown }).error,
|
||||
(current as { data?: unknown }).data,
|
||||
];
|
||||
const errors = (current as { errors?: unknown }).errors;
|
||||
if (Array.isArray(errors)) {
|
||||
maybeNested.push(...errors);
|
||||
}
|
||||
for (const nested of maybeNested) {
|
||||
if (nested != null && !seen.has(nested)) {
|
||||
queue.push(nested);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an error is an AbortError.
|
||||
* These are typically intentional cancellations (e.g., during shutdown) and shouldn't crash.
|
||||
@@ -88,28 +171,38 @@ export function isTransientNetworkError(err: unknown): boolean {
|
||||
if (!err) {
|
||||
return false;
|
||||
}
|
||||
for (const candidate of collectErrorCandidates(err)) {
|
||||
const code = extractErrorCodeOrErrno(candidate);
|
||||
if (code && TRANSIENT_NETWORK_CODES.has(code)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const code = extractErrorCodeWithCause(err);
|
||||
if (code && TRANSIENT_NETWORK_CODES.has(code)) {
|
||||
return true;
|
||||
}
|
||||
const name = getErrorName(candidate);
|
||||
if (name && TRANSIENT_NETWORK_ERROR_NAMES.has(name)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// "fetch failed" TypeError from undici (Node's native fetch).
|
||||
// Treat as transient regardless of nested cause code because causes vary
|
||||
// across runtimes and can be unclassified even for real network faults.
|
||||
if (err instanceof TypeError && err.message === "fetch failed") {
|
||||
return true;
|
||||
}
|
||||
if (candidate instanceof TypeError && candidate.message === "fetch failed") {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check the cause chain recursively
|
||||
const cause = getErrorCause(err);
|
||||
if (cause && cause !== err) {
|
||||
return isTransientNetworkError(cause);
|
||||
}
|
||||
|
||||
// AggregateError may wrap multiple causes
|
||||
if (err instanceof AggregateError && err.errors?.length) {
|
||||
return err.errors.some((e) => isTransientNetworkError(e));
|
||||
if (!candidate || typeof candidate !== "object") {
|
||||
continue;
|
||||
}
|
||||
const rawMessage = (candidate as { message?: unknown }).message;
|
||||
const message = typeof rawMessage === "string" ? rawMessage.toLowerCase().trim() : "";
|
||||
if (!message) {
|
||||
continue;
|
||||
}
|
||||
if (TRANSIENT_NETWORK_MESSAGE_CODE_RE.test(message)) {
|
||||
return true;
|
||||
}
|
||||
if (message === "fetch failed") {
|
||||
return true;
|
||||
}
|
||||
if (TRANSIENT_NETWORK_MESSAGE_SNIPPETS.some((snippet) => message.includes(snippet))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user