mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 07:17:40 +00:00
fix: codex and similar processes keep dying on pty, solved by refactoring process spawning (#14257)
* exec: clean up PTY resources on timeout and exit * cli: harden resume cleanup and watchdog stalled runs * cli: productionize PTY and resume reliability paths * docs: add PTY process supervision architecture plan * docs: rewrite PTY supervision plan as pre-rewrite baseline * docs: switch PTY supervision plan to one-go execution * docs: add one-line root cause to PTY supervision plan * docs: add OS contracts and test matrix to PTY supervision plan * docs: define process-supervisor package placement and scope * docs: tie supervisor plan to existing CI lanes * docs: place PTY supervisor plan under src/process * refactor(process): route exec and cli runs through supervisor * docs(process): refresh PTY supervision plan * wip * fix(process): harden supervisor timeout and PTY termination * fix(process): harden supervisor adapters env and wait handling * ci: avoid failing formal conformance on comment permissions * test(ui): fix cron request mock argument typing * fix(ui): remove leftover conflict marker * fix: supervise PTY processes (#14257) (openclaw#14257) (thanks @onutc)
This commit is contained in:
88
src/agents/cli-runner/reliability.ts
Normal file
88
src/agents/cli-runner/reliability.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import path from "node:path";
|
||||
import type { CliBackendConfig } from "../../config/types.js";
|
||||
import {
|
||||
CLI_FRESH_WATCHDOG_DEFAULTS,
|
||||
CLI_RESUME_WATCHDOG_DEFAULTS,
|
||||
CLI_WATCHDOG_MIN_TIMEOUT_MS,
|
||||
} from "../cli-watchdog-defaults.js";
|
||||
|
||||
function pickWatchdogProfile(
|
||||
backend: CliBackendConfig,
|
||||
useResume: boolean,
|
||||
): {
|
||||
noOutputTimeoutMs?: number;
|
||||
noOutputTimeoutRatio: number;
|
||||
minMs: number;
|
||||
maxMs: number;
|
||||
} {
|
||||
const defaults = useResume ? CLI_RESUME_WATCHDOG_DEFAULTS : CLI_FRESH_WATCHDOG_DEFAULTS;
|
||||
const configured = useResume
|
||||
? backend.reliability?.watchdog?.resume
|
||||
: backend.reliability?.watchdog?.fresh;
|
||||
|
||||
const ratio = (() => {
|
||||
const value = configured?.noOutputTimeoutRatio;
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return defaults.noOutputTimeoutRatio;
|
||||
}
|
||||
return Math.max(0.05, Math.min(0.95, value));
|
||||
})();
|
||||
const minMs = (() => {
|
||||
const value = configured?.minMs;
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return defaults.minMs;
|
||||
}
|
||||
return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
|
||||
})();
|
||||
const maxMs = (() => {
|
||||
const value = configured?.maxMs;
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return defaults.maxMs;
|
||||
}
|
||||
return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
|
||||
})();
|
||||
|
||||
return {
|
||||
noOutputTimeoutMs:
|
||||
typeof configured?.noOutputTimeoutMs === "number" &&
|
||||
Number.isFinite(configured.noOutputTimeoutMs)
|
||||
? Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(configured.noOutputTimeoutMs))
|
||||
: undefined,
|
||||
noOutputTimeoutRatio: ratio,
|
||||
minMs: Math.min(minMs, maxMs),
|
||||
maxMs: Math.max(minMs, maxMs),
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveCliNoOutputTimeoutMs(params: {
|
||||
backend: CliBackendConfig;
|
||||
timeoutMs: number;
|
||||
useResume: boolean;
|
||||
}): number {
|
||||
const profile = pickWatchdogProfile(params.backend, params.useResume);
|
||||
// Keep watchdog below global timeout in normal cases.
|
||||
const cap = Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, params.timeoutMs - 1_000);
|
||||
if (profile.noOutputTimeoutMs !== undefined) {
|
||||
return Math.min(profile.noOutputTimeoutMs, cap);
|
||||
}
|
||||
const computed = Math.floor(params.timeoutMs * profile.noOutputTimeoutRatio);
|
||||
const bounded = Math.min(profile.maxMs, Math.max(profile.minMs, computed));
|
||||
return Math.min(bounded, cap);
|
||||
}
|
||||
|
||||
export function buildCliSupervisorScopeKey(params: {
|
||||
backend: CliBackendConfig;
|
||||
backendId: string;
|
||||
cliSessionId?: string;
|
||||
}): string | undefined {
|
||||
const commandToken = path
|
||||
.basename(params.backend.command ?? "")
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
const backendToken = params.backendId.trim().toLowerCase();
|
||||
const sessionToken = params.cliSessionId?.trim();
|
||||
if (!sessionToken) {
|
||||
return undefined;
|
||||
}
|
||||
return `cli:${backendToken}:${commandToken}:${sessionToken}`;
|
||||
}
|
||||
Reference in New Issue
Block a user