fix(infra): actively kickstart launchd on supervised gateway restart

When an agent triggers a gateway restart in supervised mode, the process
exits expecting launchd KeepAlive to respawn it. But ThrottleInterval
(default 10s, or 60s on older installs) can delay or prevent restart.

Now calls triggerOpenClawRestart() to issue an explicit launchctl
kickstart before exiting, ensuring immediate respawn. Falls back to
in-process restart if kickstart fails.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Cathryn Lavery
2026-02-27 13:19:40 -06:00
committed by Peter Steinberger
parent ee2eaddeb3
commit db67492a00
2 changed files with 98 additions and 1 deletions

View File

@@ -13,12 +13,26 @@ import { restartGatewayProcessWithFreshPid } from "./process-respawn.js";
const originalArgv = [...process.argv];
const originalExecArgv = [...process.execArgv];
const envSnapshot = captureFullEnv();
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
function setPlatform(platform: string) {
if (!originalPlatformDescriptor) {
return;
}
Object.defineProperty(process, "platform", {
...originalPlatformDescriptor,
value: platform,
});
}
afterEach(() => {
envSnapshot.restore();
process.argv = [...originalArgv];
process.execArgv = [...originalExecArgv];
spawnMock.mockClear();
if (originalPlatformDescriptor) {
Object.defineProperty(process, "platform", originalPlatformDescriptor);
}
});
function clearSupervisorHints() {
@@ -42,6 +56,53 @@ describe("restartGatewayProcessWithFreshPid", () => {
expect(spawnMock).not.toHaveBeenCalled();
});
it("schedules detached launchctl kickstart on macOS when launchd label is set", () => {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const unrefMock = vi.fn();
spawnMock.mockReturnValue({ unref: unrefMock, on: vi.fn() });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(spawnMock).toHaveBeenCalledWith(
"launchctl",
["kickstart", "-k", expect.stringContaining("ai.openclaw.gateway")],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
expect(unrefMock).toHaveBeenCalledOnce();
});
it("still returns supervised even if kickstart spawn throws", () => {
setPlatform("darwin");
process.env.LAUNCH_JOB_LABEL = "ai.openclaw.gateway";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
spawnMock.mockImplementation((...args: unknown[]) => {
const [cmd] = args as [string];
if (cmd === "launchctl") {
throw new Error("spawn failed");
}
return { unref: vi.fn(), on: vi.fn() };
});
const result = restartGatewayProcessWithFreshPid();
// Kickstart is best-effort; failure should not block supervised exit
expect(result.mode).toBe("supervised");
});
it("does not schedule kickstart on non-darwin platforms", () => {
setPlatform("linux");
process.env.INVOCATION_ID = "abc123";
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(spawnMock).not.toHaveBeenCalled();
});
it("spawns detached child with current exec argv", () => {
delete process.env.OPENCLAW_NO_RESPAWN;
clearSupervisorHints();
@@ -64,10 +125,18 @@ describe("restartGatewayProcessWithFreshPid", () => {
it("returns supervised when OPENCLAW_LAUNCHD_LABEL is set (stock launchd plist)", () => {
clearSupervisorHints();
setPlatform("darwin");
process.env.OPENCLAW_LAUNCHD_LABEL = "ai.openclaw.gateway";
const unrefMock = vi.fn();
spawnMock.mockReturnValue({ unref: unrefMock, on: vi.fn() });
const result = restartGatewayProcessWithFreshPid();
expect(result.mode).toBe("supervised");
expect(spawnMock).not.toHaveBeenCalled();
expect(spawnMock).toHaveBeenCalledWith(
"launchctl",
expect.arrayContaining(["kickstart", "-k"]),
expect.objectContaining({ detached: true }),
);
expect(unrefMock).toHaveBeenCalledOnce();
});
it("returns supervised when OPENCLAW_SYSTEMD_UNIT is set", () => {

View File

@@ -21,6 +21,29 @@ function isLikelySupervisedProcess(env: NodeJS.ProcessEnv = process.env): boolea
return hasSupervisorHint(env);
}
/**
* Spawn a detached `launchctl kickstart -k` to force an immediate launchd
* restart, bypassing ThrottleInterval. The -k flag sends SIGTERM to the
* current process, so this MUST be non-blocking (spawn, not spawnSync) to
* avoid deadlocking — the gateway needs to be free to handle the signal
* and exit so launchd can start the replacement.
*/
function schedulelaunchdKickstart(label: string): boolean {
const uid = typeof process.getuid === "function" ? process.getuid() : undefined;
const target = uid !== undefined ? `gui/${uid}/${label}` : label;
try {
const child = spawn("launchctl", ["kickstart", "-k", target], {
detached: true,
stdio: "ignore",
});
child.on("error", () => {}); // best-effort; suppress uncaught error event
child.unref();
return true;
} catch {
return false;
}
}
/**
* Attempt to restart this process with a fresh PID.
* - supervised environments (launchd/systemd): caller should exit and let supervisor restart
@@ -32,6 +55,11 @@ export function restartGatewayProcessWithFreshPid(): GatewayRespawnResult {
return { mode: "disabled" };
}
if (isLikelySupervisedProcess(process.env)) {
// On macOS under launchd, fire a detached kickstart so launchd restarts
// us immediately instead of waiting for ThrottleInterval (up to 60s).
if (process.platform === "darwin" && process.env.OPENCLAW_LAUNCHD_LABEL?.trim()) {
schedulelaunchdKickstart(process.env.OPENCLAW_LAUNCHD_LABEL.trim());
}
return { mode: "supervised" };
}