From bd9521a376f48d41e7f42ab59a050d71b33ff8d4 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Mon, 23 Feb 2026 13:28:14 -0500 Subject: [PATCH] CLI: fix restart health ownership for child listener pids --- src/cli/daemon-cli/restart-health.test.ts | 57 +++++++++++++++++++++++ src/cli/daemon-cli/restart-health.ts | 25 ++++++++-- src/infra/ports-inspect.ts | 16 ++++++- src/infra/ports-types.ts | 1 + 4 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 src/cli/daemon-cli/restart-health.test.ts diff --git a/src/cli/daemon-cli/restart-health.test.ts b/src/cli/daemon-cli/restart-health.test.ts new file mode 100644 index 00000000000..c438e9a0ea8 --- /dev/null +++ b/src/cli/daemon-cli/restart-health.test.ts @@ -0,0 +1,57 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { GatewayService } from "../../daemon/service.js"; + +const inspectPortUsage = vi.hoisted(() => vi.fn()); +const classifyPortListener = vi.hoisted(() => vi.fn(() => "gateway")); + +vi.mock("../../infra/ports.js", () => ({ + classifyPortListener: (...args: unknown[]) => classifyPortListener(...args), + formatPortDiagnostics: vi.fn(() => []), + inspectPortUsage: (...args: unknown[]) => inspectPortUsage(...args), +})); + +describe("inspectGatewayRestart", () => { + beforeEach(() => { + inspectPortUsage.mockReset(); + classifyPortListener.mockReset(); + classifyPortListener.mockReturnValue("gateway"); + }); + + it("treats a gateway listener child pid as healthy ownership", async () => { + const service = { + readRuntime: vi.fn(async () => ({ status: "running", pid: 7000 })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 7001, ppid: 7000, commandLine: "openclaw-gateway" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ service, port: 18789 }); + + expect(snapshot.healthy).toBe(true); + expect(snapshot.staleGatewayPids).toEqual([]); + }); + + it("marks non-owned gateway listener pids as stale while runtime is running", async () => { + const service = { + readRuntime: vi.fn(async () => ({ status: "running", pid: 8000 })), + } as unknown as GatewayService; + + inspectPortUsage.mockResolvedValue({ + port: 18789, + status: "busy", + listeners: [{ pid: 9000, ppid: 8999, commandLine: "openclaw-gateway" }], + hints: [], + }); + + const { inspectGatewayRestart } = await import("./restart-health.js"); + const snapshot = await inspectGatewayRestart({ service, port: 18789 }); + + expect(snapshot.healthy).toBe(false); + expect(snapshot.staleGatewayPids).toEqual([9000]); + }); +}); diff --git a/src/cli/daemon-cli/restart-health.ts b/src/cli/daemon-cli/restart-health.ts index 4a0d5bcf4bb..575fdee7ec9 100644 --- a/src/cli/daemon-cli/restart-health.ts +++ b/src/cli/daemon-cli/restart-health.ts @@ -21,6 +21,13 @@ export type GatewayRestartSnapshot = { staleGatewayPids: number[]; }; +function listenerOwnedByRuntimePid(params: { + listener: PortUsage["listeners"][number]; + runtimePid: number; +}): boolean { + return params.listener.pid === params.runtimePid || params.listener.ppid === params.runtimePid; +} + export async function inspectGatewayRestart(params: { service: GatewayService; port: number; @@ -56,16 +63,26 @@ export async function inspectGatewayRestart(params: { const running = runtime.status === "running"; const ownsPort = runtime.pid != null - ? portUsage.listeners.some((listener) => listener.pid === runtime.pid) + ? portUsage.listeners.some((listener) => + listenerOwnedByRuntimePid({ listener, runtimePid: runtime.pid }), + ) : gatewayListeners.length > 0 || (portUsage.status === "busy" && portUsage.listeners.length === 0); const healthy = running && ownsPort; const staleGatewayPids = Array.from( new Set( gatewayListeners - .map((listener) => listener.pid) - .filter((pid): pid is number => Number.isFinite(pid)) - .filter((pid) => runtime.pid == null || pid !== runtime.pid || !running), + .filter((listener) => Number.isFinite(listener.pid)) + .filter((listener) => { + if (!running) { + return true; + } + if (runtime.pid == null) { + return true; + } + return !listenerOwnedByRuntimePid({ listener, runtimePid: runtime.pid }); + }) + .map((listener) => listener.pid as number), ), ); diff --git a/src/infra/ports-inspect.ts b/src/infra/ports-inspect.ts index d6c172a7bd9..344086ae14a 100644 --- a/src/infra/ports-inspect.ts +++ b/src/infra/ports-inspect.ts @@ -75,6 +75,16 @@ async function resolveUnixUser(pid: number): Promise { return line || undefined; } +async function resolveUnixParentPid(pid: number): Promise { + const res = await runCommandSafe(["ps", "-p", String(pid), "-o", "ppid="]); + if (res.code !== 0) { + return undefined; + } + const line = res.stdout.trim(); + const parentPid = Number.parseInt(line, 10); + return Number.isFinite(parentPid) && parentPid > 0 ? parentPid : undefined; +} + async function readUnixListeners( port: number, ): Promise<{ listeners: PortListener[]; detail?: string; errors: string[] }> { @@ -88,9 +98,10 @@ async function readUnixListeners( if (!listener.pid) { return; } - const [commandLine, user] = await Promise.all([ + const [commandLine, user, parentPid] = await Promise.all([ resolveUnixCommandLine(listener.pid), resolveUnixUser(listener.pid), + resolveUnixParentPid(listener.pid), ]); if (commandLine) { listener.commandLine = commandLine; @@ -98,6 +109,9 @@ async function readUnixListeners( if (user) { listener.user = user; } + if (parentPid !== undefined) { + listener.ppid = parentPid; + } }), ); return { listeners, detail: res.stdout.trim() || undefined, errors }; diff --git a/src/infra/ports-types.ts b/src/infra/ports-types.ts index 56accc93aff..827a5b3ade9 100644 --- a/src/infra/ports-types.ts +++ b/src/infra/ports-types.ts @@ -1,5 +1,6 @@ export type PortListener = { pid?: number; + ppid?: number; command?: string; commandLine?: string; user?: string;