fix(process): harden graceful kill-tree cancellation semantics

This commit is contained in:
Sebastian
2026-02-16 20:35:00 -05:00
parent 7b172d61cd
commit fb996031bc
4 changed files with 255 additions and 37 deletions

View File

@@ -0,0 +1,135 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const { spawnMock } = vi.hoisted(() => ({
spawnMock: vi.fn(),
}));
vi.mock("node:child_process", () => ({
spawn: (...args: unknown[]) => spawnMock(...args),
}));
async function withPlatform<T>(platform: NodeJS.Platform, run: () => Promise<T> | T): Promise<T> {
const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform");
Object.defineProperty(process, "platform", { value: platform, configurable: true });
try {
return await run();
} finally {
if (originalPlatform) {
Object.defineProperty(process, "platform", originalPlatform);
}
}
}
describe("killProcessTree", () => {
let killSpy: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
spawnMock.mockReset();
killSpy = vi.spyOn(process, "kill");
vi.useFakeTimers();
});
afterEach(() => {
killSpy.mockRestore();
vi.useRealTimers();
vi.resetModules();
vi.clearAllMocks();
});
it("on Windows skips delayed force-kill when PID is already gone", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === 4242 && signal === 0) {
throw new Error("ESRCH");
}
return true;
}) as typeof process.kill);
await withPlatform("win32", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(4242, { graceMs: 25 });
expect(spawnMock).toHaveBeenCalledTimes(1);
expect(spawnMock).toHaveBeenNthCalledWith(
1,
"taskkill",
["/T", "/PID", "4242"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
await vi.advanceTimersByTimeAsync(25);
expect(spawnMock).toHaveBeenCalledTimes(1);
});
});
it("on Windows force-kills after grace period only when PID still exists", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === 5252 && signal === 0) {
return true;
}
return true;
}) as typeof process.kill);
await withPlatform("win32", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(5252, { graceMs: 10 });
await vi.advanceTimersByTimeAsync(10);
expect(spawnMock).toHaveBeenCalledTimes(2);
expect(spawnMock).toHaveBeenNthCalledWith(
1,
"taskkill",
["/T", "/PID", "5252"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
expect(spawnMock).toHaveBeenNthCalledWith(
2,
"taskkill",
["/F", "/T", "/PID", "5252"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
});
});
it("on Unix sends SIGTERM first and skips SIGKILL when process exits", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === -3333 && signal === 0) {
throw new Error("ESRCH");
}
if (pid === 3333 && signal === 0) {
throw new Error("ESRCH");
}
return true;
}) as typeof process.kill);
await withPlatform("linux", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(3333, { graceMs: 10 });
await vi.advanceTimersByTimeAsync(10);
expect(killSpy).toHaveBeenCalledWith(-3333, "SIGTERM");
expect(killSpy).not.toHaveBeenCalledWith(-3333, "SIGKILL");
expect(killSpy).not.toHaveBeenCalledWith(3333, "SIGKILL");
});
});
it("on Unix sends SIGKILL after grace period when process is still alive", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === -4444 && signal === 0) {
return true;
}
return true;
}) as typeof process.kill);
await withPlatform("linux", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(4444, { graceMs: 5 });
await vi.advanceTimersByTimeAsync(5);
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGTERM");
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGKILL");
});
});
});

View File

@@ -1,5 +1,8 @@
import { spawn } from "node:child_process"; import { spawn } from "node:child_process";
const DEFAULT_GRACE_MS = 3000;
const MAX_GRACE_MS = 60_000;
/** /**
* Best-effort process-tree termination with graceful shutdown. * Best-effort process-tree termination with graceful shutdown.
* - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent * - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent
@@ -14,7 +17,7 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
return; return;
} }
const graceMs = opts?.graceMs ?? 3000; const graceMs = normalizeGraceMs(opts?.graceMs);
if (process.platform === "win32") { if (process.platform === "win32") {
killProcessTreeWindows(pid, graceMs); killProcessTreeWindows(pid, graceMs);
@@ -24,6 +27,22 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
killProcessTreeUnix(pid, graceMs); killProcessTreeUnix(pid, graceMs);
} }
function normalizeGraceMs(value?: number): number {
if (typeof value !== "number" || !Number.isFinite(value)) {
return DEFAULT_GRACE_MS;
}
return Math.max(0, Math.min(MAX_GRACE_MS, Math.floor(value)));
}
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function killProcessTreeUnix(pid: number, graceMs: number): void { function killProcessTreeUnix(pid: number, graceMs: number): void {
// Step 1: Try graceful SIGTERM to process group // Step 1: Try graceful SIGTERM to process group
try { try {
@@ -40,55 +59,46 @@ function killProcessTreeUnix(pid: number, graceMs: number): void {
// Step 2: Wait grace period, then SIGKILL if still alive // Step 2: Wait grace period, then SIGKILL if still alive
setTimeout(() => { setTimeout(() => {
try { if (isProcessAlive(-pid)) {
// Check if still alive by sending signal 0
process.kill(-pid, 0);
// Still alive - hard kill
try { try {
process.kill(-pid, "SIGKILL"); process.kill(-pid, "SIGKILL");
return;
} catch { } catch {
try { // Fall through to direct pid kill
process.kill(pid, "SIGKILL");
} catch {
// Gone now
}
} }
}
if (!isProcessAlive(pid)) {
return;
}
try {
process.kill(pid, "SIGKILL");
} catch { } catch {
// Process group gone - check direct // Process exited between liveness check and kill
try {
process.kill(pid, 0);
try {
process.kill(pid, "SIGKILL");
} catch {
// Gone
}
} catch {
// Already terminated
}
} }
}, graceMs).unref(); // Don't block event loop exit }, graceMs).unref(); // Don't block event loop exit
} }
function killProcessTreeWindows(pid: number, graceMs: number): void { function runTaskkill(args: string[]): void {
// Step 1: Try graceful termination (taskkill without /F)
try { try {
spawn("taskkill", ["/T", "/PID", String(pid)], { spawn("taskkill", args, {
stdio: "ignore", stdio: "ignore",
detached: true, detached: true,
}); });
} catch { } catch {
// Ignore spawn failures // Ignore taskkill spawn failures
} }
}
// Step 2: Wait grace period, then force kill if still alive function killProcessTreeWindows(pid: number, graceMs: number): void {
// Step 1: Try graceful termination (taskkill without /F)
runTaskkill(["/T", "/PID", String(pid)]);
// Step 2: Wait grace period, then force kill only if pid still exists.
// This avoids unconditional delayed /F kills after graceful shutdown.
setTimeout(() => { setTimeout(() => {
try { if (!isProcessAlive(pid)) {
spawn("taskkill", ["/F", "/T", "/PID", String(pid)], { return;
stdio: "ignore",
detached: true,
});
} catch {
// Ignore taskkill failures
} }
runTaskkill(["/F", "/T", "/PID", String(pid)]);
}, graceMs).unref(); // Don't block event loop exit }, graceMs).unref(); // Don't block event loop exit
} }

View File

@@ -36,9 +36,11 @@ describe("createPtyAdapter", () => {
spawnMock.mockReset(); spawnMock.mockReset();
ptyKillMock.mockReset(); ptyKillMock.mockReset();
killProcessTreeMock.mockReset(); killProcessTreeMock.mockReset();
vi.useRealTimers();
}); });
afterEach(() => { afterEach(() => {
vi.useRealTimers();
vi.resetModules(); vi.resetModules();
vi.clearAllMocks(); vi.clearAllMocks();
}); });
@@ -79,6 +81,53 @@ describe("createPtyAdapter", () => {
expect(ptyKillMock).not.toHaveBeenCalled(); expect(ptyKillMock).not.toHaveBeenCalled();
}); });
it("wait does not settle immediately on SIGKILL", async () => {
vi.useFakeTimers();
spawnMock.mockReturnValue(createStubPty());
const { createPtyAdapter } = await import("./pty.js");
const adapter = await createPtyAdapter({
shell: "bash",
args: ["-lc", "sleep 10"],
});
const waitPromise = adapter.wait();
const settled = vi.fn();
void waitPromise.then(() => settled());
adapter.kill();
await Promise.resolve();
expect(settled).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(3999);
expect(settled).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(1);
await expect(waitPromise).resolves.toEqual({ code: null, signal: "SIGKILL" });
});
it("prefers real PTY exit over SIGKILL fallback settle", async () => {
vi.useFakeTimers();
const stub = createStubPty();
spawnMock.mockReturnValue(stub);
const { createPtyAdapter } = await import("./pty.js");
const adapter = await createPtyAdapter({
shell: "bash",
args: ["-lc", "sleep 10"],
});
const waitPromise = adapter.wait();
adapter.kill();
stub.emitExit({ exitCode: 0, signal: 9 });
await expect(waitPromise).resolves.toEqual({ code: 0, signal: 9 });
await vi.advanceTimersByTimeAsync(10_000);
await expect(adapter.wait()).resolves.toEqual({ code: 0, signal: 9 });
});
it("resolves wait when exit fires before wait is called", async () => { it("resolves wait when exit fires before wait is called", async () => {
const stub = createStubPty(); const stub = createStubPty();
spawnMock.mockReturnValue(stub); spawnMock.mockReturnValue(stub);

View File

@@ -1,7 +1,9 @@
import { killProcessTree } from "../../kill-tree.js";
import type { ManagedRunStdin } from "../types.js"; import type { ManagedRunStdin } from "../types.js";
import { killProcessTree } from "../../kill-tree.js";
import { toStringEnv } from "./env.js"; import { toStringEnv } from "./env.js";
const FORCE_KILL_WAIT_FALLBACK_MS = 4000;
type PtyExitEvent = { exitCode: number; signal?: number }; type PtyExitEvent = { exitCode: number; signal?: number };
type PtyDisposable = { dispose: () => void }; type PtyDisposable = { dispose: () => void };
type PtySpawnHandle = { type PtySpawnHandle = {
@@ -70,11 +72,21 @@ export async function createPtyAdapter(params: {
| null = null; | null = null;
let waitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | number | null }> | null = let waitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | number | null }> | null =
null; null;
let forceKillWaitFallbackTimer: NodeJS.Timeout | null = null;
const clearForceKillWaitFallback = () => {
if (!forceKillWaitFallbackTimer) {
return;
}
clearTimeout(forceKillWaitFallbackTimer);
forceKillWaitFallbackTimer = null;
};
const settleWait = (value: { code: number | null; signal: NodeJS.Signals | number | null }) => { const settleWait = (value: { code: number | null; signal: NodeJS.Signals | number | null }) => {
if (waitResult) { if (waitResult) {
return; return;
} }
clearForceKillWaitFallback();
waitResult = value; waitResult = value;
if (resolveWait) { if (resolveWait) {
const resolve = resolveWait; const resolve = resolveWait;
@@ -83,6 +95,16 @@ export async function createPtyAdapter(params: {
} }
}; };
const scheduleForceKillWaitFallback = (signal: NodeJS.Signals) => {
clearForceKillWaitFallback();
// Some PTY hosts fail to emit onExit after kill; use a delayed fallback
// so callers can still unblock without marking termination immediately.
forceKillWaitFallbackTimer = setTimeout(() => {
settleWait({ code: null, signal });
}, FORCE_KILL_WAIT_FALLBACK_MS);
forceKillWaitFallbackTimer.unref();
};
exitListener = exitListener =
pty.onExit((event) => { pty.onExit((event) => {
const signal = event.signal && event.signal !== 0 ? event.signal : null; const signal = event.signal && event.signal !== 0 ? event.signal : null;
@@ -151,9 +173,10 @@ export async function createPtyAdapter(params: {
} catch { } catch {
// ignore kill errors // ignore kill errors
} }
// Some PTY hosts do not emit `onExit` reliably after kill.
// Ensure waiters can progress on forced termination. if (signal === "SIGKILL") {
settleWait({ code: null, signal }); scheduleForceKillWaitFallback(signal);
}
}; };
const dispose = () => { const dispose = () => {
@@ -167,6 +190,7 @@ export async function createPtyAdapter(params: {
} catch { } catch {
// ignore disposal errors // ignore disposal errors
} }
clearForceKillWaitFallback();
dataListener = null; dataListener = null;
exitListener = null; exitListener = null;
settleWait({ code: null, signal: null }); settleWait({ code: null, signal: null });