mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 04:07:39 +00:00
fix: prevent act:evaluate hangs from getting browser tool stuck/killed (#13498)
* fix(browser): prevent permanent timeout after stuck evaluate Thread AbortSignal from client-fetch through dispatcher to Playwright operations. When a timeout fires, force-disconnect the Playwright CDP connection to unblock the serialized command queue, allowing the next call to reconnect transparently. Key changes: - client-fetch.ts: proper AbortController with signal propagation - pw-session.ts: new forceDisconnectPlaywrightForTarget() - pw-tools-core.interactions.ts: accept signal, align inner timeout to outer-500ms, inject in-browser Promise.race for async evaluates - routes/dispatcher.ts + types.ts: propagate signal through dispatch - server.ts + bridge-server.ts: Express middleware creates AbortSignal from request lifecycle - client-actions-core.ts: add timeoutMs to evaluate type Fixes #10994 * fix(browser): v2 - force-disconnect via Connection.close() instead of browser.close() When page.evaluate() is stuck on a hung CDP transport, browser.close() also hangs because it tries to send a close command through the same stuck pipe. v2 fix: forceDisconnectPlaywrightForTarget now directly calls Playwright's internal Connection.close() which locally rejects all pending callbacks and emits 'disconnected' without touching the network. This instantly unblocks all stuck Playwright operations. closePlaywrightBrowserConnection (clean shutdown) now also has a 3s timeout fallback that drops to forceDropConnection if browser.close() hangs. Fixes permanent browser timeout after stuck evaluate. * fix(browser): v3 - fire-and-forget browser.close() instead of Connection.close() v2's forceDropConnection called browser._connection.close() which corrupts the entire Playwright instance because Connection is shared across all objects (BrowserType, Browser, Page, etc.). This prevented reconnection with cascading 'connectOverCDP: Force-disconnected' errors. v3 fix: forceDisconnectPlaywrightForTarget now: 1. Nulls cached connection immediately 2. Fire-and-forgets browser.close() (doesn't await — it may hang) 3. Next connectBrowser() creates a fresh connectOverCDP WebSocket Each connectOverCDP creates an independent WebSocket to the CDP endpoint, so the new connection is unaffected by the old one's pending close. The old browser.close() eventually resolves when the in-browser evaluate timeout fires, or the old connection gets GC'd. * fix(browser): v4 - clear connecting state and remove stale disconnect listeners The reconnect was failing because: 1. forceDisconnectPlaywrightForTarget nulled cached but not connecting, so subsequent calls could await a stale promise 2. The old browser's 'disconnected' event handler raced with new connections, nulling the fresh cached reference Fix: null both cached and connecting, and removeAllListeners on the old browser before fire-and-forget close. * fix(browser): v5 - use raw CDP Runtime.terminateExecution to kill stuck evaluate When forceDisconnectPlaywrightForTarget fires, open a raw WebSocket to the stuck page's CDP endpoint and send Runtime.terminateExecution. This kills running JS without navigating away or crashing the page. Also clear connecting state and remove stale disconnect listeners. * fix(browser): abort cancels stuck evaluate * Browser: always cleanup evaluate abort listener * Chore: remove Playwright debug scripts * Docs: add CDP evaluate refactor plan * Browser: refactor Playwright force-disconnect * Browser: abort stops evaluate promptly * Node host: extract withTimeout helper * Browser: remove disconnected listener safely * Changelog: note act:evaluate hang fix --------- Co-authored-by: Bob <bob@dutifulbob.com>
This commit is contained in:
@@ -8,7 +8,8 @@ import type {
|
||||
} from "playwright-core";
|
||||
import { chromium } from "playwright-core";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import { getHeadersWithAuth } from "./cdp.helpers.js";
|
||||
import { appendCdpPath, fetchJson, getHeadersWithAuth, withCdpSocket } from "./cdp.helpers.js";
|
||||
import { normalizeCdpWsUrl } from "./cdp.js";
|
||||
import { getChromeWebSocketUrl } from "./chrome.js";
|
||||
|
||||
export type BrowserConsoleMessage = {
|
||||
@@ -52,6 +53,7 @@ type TargetInfoResponse = {
|
||||
type ConnectedBrowser = {
|
||||
browser: Browser;
|
||||
cdpUrl: string;
|
||||
onDisconnected?: () => void;
|
||||
};
|
||||
|
||||
type PageState = {
|
||||
@@ -333,14 +335,15 @@ async function connectBrowser(cdpUrl: string): Promise<ConnectedBrowser> {
|
||||
const endpoint = wsUrl ?? normalized;
|
||||
const headers = getHeadersWithAuth(endpoint);
|
||||
const browser = await chromium.connectOverCDP(endpoint, { timeout, headers });
|
||||
const connected: ConnectedBrowser = { browser, cdpUrl: normalized };
|
||||
cached = connected;
|
||||
observeBrowser(browser);
|
||||
browser.on("disconnected", () => {
|
||||
const onDisconnected = () => {
|
||||
if (cached?.browser === browser) {
|
||||
cached = null;
|
||||
}
|
||||
});
|
||||
};
|
||||
const connected: ConnectedBrowser = { browser, cdpUrl: normalized, onDisconnected };
|
||||
cached = connected;
|
||||
browser.on("disconnected", onDisconnected);
|
||||
observeBrowser(browser);
|
||||
return connected;
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
@@ -503,12 +506,168 @@ export function refLocator(page: Page, ref: string) {
|
||||
export async function closePlaywrightBrowserConnection(): Promise<void> {
|
||||
const cur = cached;
|
||||
cached = null;
|
||||
connecting = null;
|
||||
if (!cur) {
|
||||
return;
|
||||
}
|
||||
if (cur.onDisconnected && typeof cur.browser.off === "function") {
|
||||
cur.browser.off("disconnected", cur.onDisconnected);
|
||||
}
|
||||
await cur.browser.close().catch(() => {});
|
||||
}
|
||||
|
||||
function normalizeCdpHttpBaseForJsonEndpoints(cdpUrl: string): string {
|
||||
try {
|
||||
const url = new URL(cdpUrl);
|
||||
if (url.protocol === "ws:") {
|
||||
url.protocol = "http:";
|
||||
} else if (url.protocol === "wss:") {
|
||||
url.protocol = "https:";
|
||||
}
|
||||
url.pathname = url.pathname.replace(/\/devtools\/browser\/.*$/, "");
|
||||
url.pathname = url.pathname.replace(/\/cdp$/, "");
|
||||
return url.toString().replace(/\/$/, "");
|
||||
} catch {
|
||||
// Best-effort fallback for non-URL-ish inputs.
|
||||
return cdpUrl
|
||||
.replace(/^ws:/, "http:")
|
||||
.replace(/^wss:/, "https:")
|
||||
.replace(/\/devtools\/browser\/.*$/, "")
|
||||
.replace(/\/cdp$/, "")
|
||||
.replace(/\/$/, "");
|
||||
}
|
||||
}
|
||||
|
||||
function cdpSocketNeedsAttach(wsUrl: string): boolean {
|
||||
try {
|
||||
const pathname = new URL(wsUrl).pathname;
|
||||
return (
|
||||
pathname === "/cdp" || pathname.endsWith("/cdp") || pathname.includes("/devtools/browser/")
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function tryTerminateExecutionViaCdp(opts: {
|
||||
cdpUrl: string;
|
||||
targetId: string;
|
||||
}): Promise<void> {
|
||||
const cdpHttpBase = normalizeCdpHttpBaseForJsonEndpoints(opts.cdpUrl);
|
||||
const listUrl = appendCdpPath(cdpHttpBase, "/json/list");
|
||||
|
||||
const pages = await fetchJson<
|
||||
Array<{
|
||||
id?: string;
|
||||
webSocketDebuggerUrl?: string;
|
||||
}>
|
||||
>(listUrl, 2000).catch(() => null);
|
||||
if (!pages || pages.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const target = pages.find((p) => String(p.id ?? "").trim() === opts.targetId);
|
||||
const wsUrlRaw = String(target?.webSocketDebuggerUrl ?? "").trim();
|
||||
if (!wsUrlRaw) {
|
||||
return;
|
||||
}
|
||||
const wsUrl = normalizeCdpWsUrl(wsUrlRaw, cdpHttpBase);
|
||||
const needsAttach = cdpSocketNeedsAttach(wsUrl);
|
||||
|
||||
const runWithTimeout = async <T>(work: Promise<T>, ms: number): Promise<T> => {
|
||||
let timer: ReturnType<typeof setTimeout> | undefined;
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
timer = setTimeout(() => reject(new Error("CDP command timed out")), ms);
|
||||
});
|
||||
try {
|
||||
return await Promise.race([work, timeoutPromise]);
|
||||
} finally {
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
await withCdpSocket(
|
||||
wsUrl,
|
||||
async (send) => {
|
||||
let sessionId: string | undefined;
|
||||
try {
|
||||
if (needsAttach) {
|
||||
const attached = (await runWithTimeout(
|
||||
send("Target.attachToTarget", { targetId: opts.targetId, flatten: true }),
|
||||
1500,
|
||||
)) as { sessionId?: unknown };
|
||||
if (typeof attached?.sessionId === "string" && attached.sessionId.trim()) {
|
||||
sessionId = attached.sessionId;
|
||||
}
|
||||
}
|
||||
await runWithTimeout(send("Runtime.terminateExecution", undefined, sessionId), 1500);
|
||||
if (sessionId) {
|
||||
// Best-effort cleanup; not required for termination to take effect.
|
||||
void send("Target.detachFromTarget", { sessionId }).catch(() => {});
|
||||
}
|
||||
} catch {
|
||||
// Best-effort; ignore
|
||||
}
|
||||
},
|
||||
{ handshakeTimeoutMs: 2000 },
|
||||
).catch(() => {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort cancellation for stuck page operations.
|
||||
*
|
||||
* Playwright serializes CDP commands per page; a long-running or stuck operation (notably evaluate)
|
||||
* can block all subsequent commands. We cannot safely "cancel" an individual command, and we do
|
||||
* not want to close the actual Chromium tab. Instead, we disconnect Playwright's CDP connection
|
||||
* so in-flight commands fail fast and the next request reconnects transparently.
|
||||
*
|
||||
* IMPORTANT: We CANNOT call Connection.close() because Playwright shares a single Connection
|
||||
* across all objects (BrowserType, Browser, etc.). Closing it corrupts the entire Playwright
|
||||
* instance, preventing reconnection.
|
||||
*
|
||||
* Instead we:
|
||||
* 1. Null out `cached` so the next call triggers a fresh connectOverCDP
|
||||
* 2. Fire-and-forget browser.close() — it may hang but won't block us
|
||||
* 3. The next connectBrowser() creates a completely new CDP WebSocket connection
|
||||
*
|
||||
* The old browser.close() eventually resolves when the in-browser evaluate timeout fires,
|
||||
* or the old connection gets GC'd. Either way, it doesn't affect the fresh connection.
|
||||
*/
|
||||
export async function forceDisconnectPlaywrightForTarget(opts: {
|
||||
cdpUrl: string;
|
||||
targetId?: string;
|
||||
reason?: string;
|
||||
}): Promise<void> {
|
||||
const normalized = normalizeCdpUrl(opts.cdpUrl);
|
||||
if (cached?.cdpUrl !== normalized) {
|
||||
return;
|
||||
}
|
||||
const cur = cached;
|
||||
cached = null;
|
||||
// Also clear `connecting` so the next call does a fresh connectOverCDP
|
||||
// rather than awaiting a stale promise.
|
||||
connecting = null;
|
||||
if (cur) {
|
||||
// Remove the "disconnected" listener to prevent the old browser's teardown
|
||||
// from racing with a fresh connection and nulling the new `cached`.
|
||||
if (cur.onDisconnected && typeof cur.browser.off === "function") {
|
||||
cur.browser.off("disconnected", cur.onDisconnected);
|
||||
}
|
||||
|
||||
// Best-effort: kill any stuck JS to unblock the target's execution context before we
|
||||
// disconnect Playwright's CDP connection.
|
||||
const targetId = opts.targetId?.trim() || "";
|
||||
if (targetId) {
|
||||
await tryTerminateExecutionViaCdp({ cdpUrl: normalized, targetId }).catch(() => {});
|
||||
}
|
||||
|
||||
// Fire-and-forget: don't await because browser.close() may hang on the stuck CDP pipe.
|
||||
cur.browser.close().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all pages/tabs from the persistent Playwright connection.
|
||||
* Used for remote profiles where HTTP-based /json/list is ephemeral.
|
||||
|
||||
Reference in New Issue
Block a user