From b9757114290f1939f07718b18727417cee3cfaa2 Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Wed, 25 Feb 2026 23:40:48 -0800 Subject: [PATCH] fix(daemon): stabilize LaunchAgent restart and proxy env passthrough (#27276) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: b08797a99561f3d849443f77fda4fe086c508b49 Co-authored-by: frankekn <4488090+frankekn@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + src/daemon/launchd-plist.ts | 4 +- src/daemon/launchd.integration.test.ts | 112 +++++++++++++++++++++++++ src/daemon/launchd.test.ts | 86 +++++++++++++++++++ src/daemon/launchd.ts | 70 +++++++++++++++- src/daemon/service-env.test.ts | 33 ++++++++ src/daemon/service-env.ts | 33 ++++++++ 7 files changed, 334 insertions(+), 5 deletions(-) create mode 100644 src/daemon/launchd.integration.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b890896f0d3..21e75f9ab6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Daemon/macOS launchd: forward proxy env vars into supervised service environments, switch LaunchAgent keepalive policy to crash-only with throttling, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn. - Android/Node invoke: remove native gateway WebSocket `Origin` header to avoid false origin rejections, unify invoke command registry/policy/error parsing paths, and keep command availability checks centralized to reduce dispatcher/advertisement drift. (#27257) Thanks @obviyus. - CI/Windows: shard the Windows `checks-windows` test lane into two matrix jobs and honor explicit shard index overrides in `scripts/test-parallel.mjs` to reduce CI critical-path wall time. (#27234) Thanks @joshavant. diff --git a/src/daemon/launchd-plist.ts b/src/daemon/launchd-plist.ts index e685cd9941c..7918e1c8a37 100644 --- a/src/daemon/launchd-plist.ts +++ b/src/daemon/launchd-plist.ts @@ -1,5 +1,7 @@ import fs from "node:fs/promises"; +const LAUNCHD_THROTTLE_INTERVAL_SECONDS = 5; + const plistEscape = (value: string): string => value .replaceAll("&", "&") @@ -106,5 +108,5 @@ export function buildLaunchAgentPlist({ ? `\n Comment\n ${plistEscape(comment.trim())}` : ""; const envXml = renderEnvDict(environment); - return `\n\n\n \n Label\n ${plistEscape(label)}\n ${commentXml}\n RunAtLoad\n \n KeepAlive\n \n ProgramArguments\n ${argsXml}\n \n ${workingDirXml}\n StandardOutPath\n ${plistEscape(stdoutPath)}\n StandardErrorPath\n ${plistEscape(stderrPath)}${envXml}\n \n\n`; + return `\n\n\n \n Label\n ${plistEscape(label)}\n ${commentXml}\n RunAtLoad\n \n KeepAlive\n \n SuccessfulExit\n \n \n ThrottleInterval\n ${LAUNCHD_THROTTLE_INTERVAL_SECONDS}\n ProgramArguments\n ${argsXml}\n \n ${workingDirXml}\n StandardOutPath\n ${plistEscape(stdoutPath)}\n StandardErrorPath\n ${plistEscape(stderrPath)}${envXml}\n \n\n`; } diff --git a/src/daemon/launchd.integration.test.ts b/src/daemon/launchd.integration.test.ts new file mode 100644 index 00000000000..423ab3fa1a1 --- /dev/null +++ b/src/daemon/launchd.integration.test.ts @@ -0,0 +1,112 @@ +import { spawnSync } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { PassThrough } from "node:stream"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { + installLaunchAgent, + readLaunchAgentRuntime, + restartLaunchAgent, + resolveLaunchAgentPlistPath, + uninstallLaunchAgent, +} from "./launchd.js"; +import type { GatewayServiceEnv } from "./service-types.js"; + +const WAIT_INTERVAL_MS = 200; +const WAIT_TIMEOUT_MS = 15_000; + +function canRunLaunchdIntegration(): boolean { + if (process.platform !== "darwin") { + return false; + } + if (typeof process.getuid !== "function") { + return false; + } + const domain = `gui/${process.getuid()}`; + const probe = spawnSync("launchctl", ["print", domain], { encoding: "utf8" }); + if (probe.error) { + return false; + } + return probe.status === 0; +} + +const describeLaunchdIntegration = canRunLaunchdIntegration() ? describe : describe.skip; + +async function waitForRunningRuntime(params: { + env: GatewayServiceEnv; + pidNot?: number; + timeoutMs?: number; +}): Promise<{ pid: number }> { + const timeoutMs = params.timeoutMs ?? WAIT_TIMEOUT_MS; + const deadline = Date.now() + timeoutMs; + let lastStatus = "unknown"; + let lastPid: number | undefined; + while (Date.now() < deadline) { + const runtime = await readLaunchAgentRuntime(params.env); + lastStatus = runtime.status; + lastPid = runtime.pid; + if ( + runtime.status === "running" && + typeof runtime.pid === "number" && + runtime.pid > 1 && + (params.pidNot === undefined || runtime.pid !== params.pidNot) + ) { + return { pid: runtime.pid }; + } + await new Promise((resolve) => { + setTimeout(resolve, WAIT_INTERVAL_MS); + }); + } + throw new Error( + `Timed out waiting for launchd runtime (status=${lastStatus}, pid=${lastPid ?? "none"})`, + ); +} + +describeLaunchdIntegration("launchd integration", () => { + let env: GatewayServiceEnv | undefined; + let homeDir = ""; + const stdout = new PassThrough(); + + beforeAll(async () => { + const testId = randomUUID().slice(0, 8); + homeDir = await fs.mkdtemp(path.join(os.tmpdir(), `openclaw-launchd-int-${testId}-`)); + env = { + HOME: homeDir, + OPENCLAW_LAUNCHD_LABEL: `ai.openclaw.launchd-int-${testId}`, + OPENCLAW_LOG_PREFIX: `gateway-launchd-int-${testId}`, + }; + await installLaunchAgent({ + env, + stdout, + programArguments: [process.execPath, "-e", "setInterval(() => {}, 1000);"], + }); + await waitForRunningRuntime({ env }); + }, 30_000); + + afterAll(async () => { + if (env) { + try { + await uninstallLaunchAgent({ env, stdout }); + } catch { + // Best-effort cleanup in case launchctl state already changed. + } + } + if (homeDir) { + await fs.rm(homeDir, { recursive: true, force: true }); + } + }, 30_000); + + it("restarts launchd service and keeps it running with a new pid", async () => { + if (!env) { + throw new Error("launchd integration env was not initialized"); + } + const before = await waitForRunningRuntime({ env }); + await restartLaunchAgent({ env, stdout }); + const after = await waitForRunningRuntime({ env, pidNot: before.pid }); + expect(after.pid).toBeGreaterThan(1); + expect(after.pid).not.toBe(before.pid); + await fs.access(resolveLaunchAgentPlistPath(env)); + }, 30_000); +}); diff --git a/src/daemon/launchd.test.ts b/src/daemon/launchd.test.ts index b68774cb19f..7465666a158 100644 --- a/src/daemon/launchd.test.ts +++ b/src/daemon/launchd.test.ts @@ -5,12 +5,14 @@ import { isLaunchAgentListed, parseLaunchctlPrint, repairLaunchAgentBootstrap, + restartLaunchAgent, resolveLaunchAgentPlistPath, } from "./launchd.js"; const state = vi.hoisted(() => ({ launchctlCalls: [] as string[][], listOutput: "", + printOutput: "", bootstrapError: "", dirs: new Set(), files: new Map(), @@ -35,6 +37,9 @@ vi.mock("./exec-file.js", () => ({ if (call[0] === "list") { return { stdout: state.listOutput, stderr: "", code: 0 }; } + if (call[0] === "print") { + return { stdout: state.printOutput, stderr: "", code: 0 }; + } if (call[0] === "bootstrap" && state.bootstrapError) { return { stdout: "", stderr: state.bootstrapError, code: 1 }; } @@ -71,6 +76,7 @@ vi.mock("node:fs/promises", async (importOriginal) => { beforeEach(() => { state.launchctlCalls.length = 0; state.listOutput = ""; + state.printOutput = ""; state.bootstrapError = ""; state.dirs.clear(); state.files.clear(); @@ -179,6 +185,86 @@ describe("launchd install", () => { expect(plist).toContain(`${tmpDir}`); }); + it("writes crash-only KeepAlive policy with throttle interval", async () => { + const env = createDefaultLaunchdEnv(); + await installLaunchAgent({ + env, + stdout: new PassThrough(), + programArguments: defaultProgramArguments, + }); + + const plistPath = resolveLaunchAgentPlistPath(env); + const plist = state.files.get(plistPath) ?? ""; + expect(plist).toContain("KeepAlive"); + expect(plist).toContain("SuccessfulExit"); + expect(plist).toContain(""); + expect(plist).toContain("ThrottleInterval"); + expect(plist).toContain("5"); + }); + + it("restarts LaunchAgent with bootout-bootstrap-kickstart order", async () => { + const env = createDefaultLaunchdEnv(); + await restartLaunchAgent({ + env, + stdout: new PassThrough(), + }); + + const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501"; + const label = "ai.openclaw.gateway"; + const plistPath = resolveLaunchAgentPlistPath(env); + const bootoutIndex = state.launchctlCalls.findIndex( + (c) => c[0] === "bootout" && c[1] === `${domain}/${label}`, + ); + const bootstrapIndex = state.launchctlCalls.findIndex( + (c) => c[0] === "bootstrap" && c[1] === domain && c[2] === plistPath, + ); + const kickstartIndex = state.launchctlCalls.findIndex( + (c) => c[0] === "kickstart" && c[1] === "-k" && c[2] === `${domain}/${label}`, + ); + + expect(bootoutIndex).toBeGreaterThanOrEqual(0); + expect(bootstrapIndex).toBeGreaterThanOrEqual(0); + expect(kickstartIndex).toBeGreaterThanOrEqual(0); + expect(bootoutIndex).toBeLessThan(bootstrapIndex); + expect(bootstrapIndex).toBeLessThan(kickstartIndex); + }); + + it("waits for previous launchd pid to exit before bootstrapping", async () => { + const env = createDefaultLaunchdEnv(); + state.printOutput = ["state = running", "pid = 4242"].join("\n"); + const killSpy = vi.spyOn(process, "kill"); + killSpy + .mockImplementationOnce(() => true) + .mockImplementationOnce(() => { + const err = new Error("no such process") as NodeJS.ErrnoException; + err.code = "ESRCH"; + throw err; + }); + + vi.useFakeTimers(); + try { + const restartPromise = restartLaunchAgent({ + env, + stdout: new PassThrough(), + }); + await vi.advanceTimersByTimeAsync(250); + await restartPromise; + expect(killSpy).toHaveBeenCalledWith(4242, 0); + const domain = typeof process.getuid === "function" ? `gui/${process.getuid()}` : "gui/501"; + const label = "ai.openclaw.gateway"; + const bootoutIndex = state.launchctlCalls.findIndex( + (c) => c[0] === "bootout" && c[1] === `${domain}/${label}`, + ); + const bootstrapIndex = state.launchctlCalls.findIndex((c) => c[0] === "bootstrap"); + expect(bootoutIndex).toBeGreaterThanOrEqual(0); + expect(bootstrapIndex).toBeGreaterThanOrEqual(0); + expect(bootoutIndex).toBeLessThan(bootstrapIndex); + } finally { + vi.useRealTimers(); + killSpy.mockRestore(); + } + }); + it("shows actionable guidance when launchctl gui domain does not support bootstrap", async () => { state.bootstrapError = "Bootstrap failed: 125: Domain does not support specified action"; const env = createDefaultLaunchdEnv(); diff --git a/src/daemon/launchd.ts b/src/daemon/launchd.ts index dded364858b..5326413b73d 100644 --- a/src/daemon/launchd.ts +++ b/src/daemon/launchd.ts @@ -331,6 +331,34 @@ function isUnsupportedGuiDomain(detail: string): boolean { ); } +const RESTART_PID_WAIT_TIMEOUT_MS = 10_000; +const RESTART_PID_WAIT_INTERVAL_MS = 200; + +async function sleepMs(ms: number): Promise { + await new Promise((resolve) => { + setTimeout(resolve, ms); + }); +} + +async function waitForPidExit(pid: number): Promise { + if (!Number.isFinite(pid) || pid <= 1) { + return; + } + const deadline = Date.now() + RESTART_PID_WAIT_TIMEOUT_MS; + while (Date.now() < deadline) { + try { + process.kill(pid, 0); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ESRCH" || code === "EPERM") { + return; + } + return; + } + await sleepMs(RESTART_PID_WAIT_INTERVAL_MS); + } +} + export async function stopLaunchAgent({ stdout, env }: GatewayServiceControlArgs): Promise { const domain = resolveGuiDomain(); const label = resolveLaunchAgentLabel({ env }); @@ -418,11 +446,45 @@ export async function restartLaunchAgent({ stdout, env, }: GatewayServiceControlArgs): Promise { + const serviceEnv = env ?? (process.env as GatewayServiceEnv); const domain = resolveGuiDomain(); - const label = resolveLaunchAgentLabel({ env }); - const res = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]); - if (res.code !== 0) { - throw new Error(`launchctl kickstart failed: ${res.stderr || res.stdout}`.trim()); + const label = resolveLaunchAgentLabel({ env: serviceEnv }); + const plistPath = resolveLaunchAgentPlistPath(serviceEnv); + + const runtime = await execLaunchctl(["print", `${domain}/${label}`]); + const previousPid = + runtime.code === 0 + ? parseLaunchctlPrint(runtime.stdout || runtime.stderr || "").pid + : undefined; + + const stop = await execLaunchctl(["bootout", `${domain}/${label}`]); + if (stop.code !== 0 && !isLaunchctlNotLoaded(stop)) { + throw new Error(`launchctl bootout failed: ${stop.stderr || stop.stdout}`.trim()); + } + if (typeof previousPid === "number") { + await waitForPidExit(previousPid); + } + + const boot = await execLaunchctl(["bootstrap", domain, plistPath]); + if (boot.code !== 0) { + const detail = (boot.stderr || boot.stdout).trim(); + if (isUnsupportedGuiDomain(detail)) { + throw new Error( + [ + `launchctl bootstrap failed: ${detail}`, + `LaunchAgent restart requires a logged-in macOS GUI session for this user (${domain}).`, + "This usually means you are running from SSH/headless context or as the wrong user (including sudo).", + "Fix: sign in to the macOS desktop as the target user and rerun `openclaw gateway restart`.", + "Headless deployments should use a dedicated logged-in user session or a custom LaunchDaemon (not shipped): https://docs.openclaw.ai/gateway", + ].join("\n"), + ); + } + throw new Error(`launchctl bootstrap failed: ${detail}`); + } + + const start = await execLaunchctl(["kickstart", "-k", `${domain}/${label}`]); + if (start.code !== 0) { + throw new Error(`launchctl kickstart failed: ${start.stderr || start.stdout}`.trim()); } try { stdout.write(`${formatLine("Restarted LaunchAgent", `${domain}/${label}`)}\n`); diff --git a/src/daemon/service-env.test.ts b/src/daemon/service-env.test.ts index 31a46c49909..2cfa4cce1de 100644 --- a/src/daemon/service-env.test.ts +++ b/src/daemon/service-env.test.ts @@ -309,6 +309,26 @@ describe("buildServiceEnvironment", () => { expect(env.OPENCLAW_LAUNCHD_LABEL).toBe("ai.openclaw.work"); } }); + + it("forwards proxy environment variables for launchd/systemd runtime", () => { + const env = buildServiceEnvironment({ + env: { + HOME: "/home/user", + HTTP_PROXY: " http://proxy.local:7890 ", + HTTPS_PROXY: "https://proxy.local:7890", + NO_PROXY: "localhost,127.0.0.1", + http_proxy: "http://proxy.local:7890", + all_proxy: "socks5://proxy.local:1080", + }, + port: 18789, + }); + + expect(env.HTTP_PROXY).toBe("http://proxy.local:7890"); + expect(env.HTTPS_PROXY).toBe("https://proxy.local:7890"); + expect(env.NO_PROXY).toBe("localhost,127.0.0.1"); + expect(env.http_proxy).toBe("http://proxy.local:7890"); + expect(env.all_proxy).toBe("socks5://proxy.local:1080"); + }); }); describe("buildNodeServiceEnvironment", () => { @@ -319,6 +339,19 @@ describe("buildNodeServiceEnvironment", () => { expect(env.HOME).toBe("/home/user"); }); + it("forwards proxy environment variables for node services", () => { + const env = buildNodeServiceEnvironment({ + env: { + HOME: "/home/user", + HTTPS_PROXY: " https://proxy.local:7890 ", + no_proxy: "localhost,127.0.0.1", + }, + }); + + expect(env.HTTPS_PROXY).toBe("https://proxy.local:7890"); + expect(env.no_proxy).toBe("localhost,127.0.0.1"); + }); + it("forwards TMPDIR for node services", () => { const env = buildNodeServiceEnvironment({ env: { HOME: "/home/user", TMPDIR: "/tmp/custom" }, diff --git a/src/daemon/service-env.ts b/src/daemon/service-env.ts index 4925a337611..458ca515c1d 100644 --- a/src/daemon/service-env.ts +++ b/src/daemon/service-env.ts @@ -25,6 +25,35 @@ type BuildServicePathOptions = MinimalServicePathOptions & { env?: Record; }; +const SERVICE_PROXY_ENV_KEYS = [ + "HTTP_PROXY", + "HTTPS_PROXY", + "NO_PROXY", + "ALL_PROXY", + "http_proxy", + "https_proxy", + "no_proxy", + "all_proxy", +] as const; + +function readServiceProxyEnvironment( + env: Record, +): Record { + const out: Record = {}; + for (const key of SERVICE_PROXY_ENV_KEYS) { + const value = env[key]; + if (typeof value !== "string") { + continue; + } + const trimmed = value.trim(); + if (!trimmed) { + continue; + } + out[key] = trimmed; + } + return out; +} + function addNonEmptyDir(dirs: string[], dir: string | undefined): void { if (dir) { dirs.push(dir); @@ -218,10 +247,12 @@ export function buildServiceEnvironment(params: { const configPath = env.OPENCLAW_CONFIG_PATH; // Keep a usable temp directory for supervised services even when the host env omits TMPDIR. const tmpDir = env.TMPDIR?.trim() || os.tmpdir(); + const proxyEnv = readServiceProxyEnvironment(env); return { HOME: env.HOME, TMPDIR: tmpDir, PATH: buildMinimalServicePath({ env }), + ...proxyEnv, OPENCLAW_PROFILE: profile, OPENCLAW_STATE_DIR: stateDir, OPENCLAW_CONFIG_PATH: configPath, @@ -242,10 +273,12 @@ export function buildNodeServiceEnvironment(params: { const stateDir = env.OPENCLAW_STATE_DIR; const configPath = env.OPENCLAW_CONFIG_PATH; const tmpDir = env.TMPDIR?.trim() || os.tmpdir(); + const proxyEnv = readServiceProxyEnvironment(env); return { HOME: env.HOME, TMPDIR: tmpDir, PATH: buildMinimalServicePath({ env }), + ...proxyEnv, OPENCLAW_STATE_DIR: stateDir, OPENCLAW_CONFIG_PATH: configPath, OPENCLAW_LAUNCHD_LABEL: resolveNodeLaunchAgentLabel(),