fix: kill stuck ACP child processes on startup and harden sessions in discord threads (#33699)

* Gateway: resolve agent.wait for chat.send runs

* Discord: harden ACP thread binding + listener timeout

* ACPX: handle already-exited child wait

* Gateway/Discord: address PR review findings

* Discord: keep ACP error-state thread bindings on startup

* gateway: make agent.wait dedupe bridge event-driven

* discord: harden ACP probe classification and cap startup fan-out

* discord: add cooperative timeout cancellation

* discord: fix startup probe concurrency helper typing

* plugin-sdk: avoid Windows root-alias shard timeout

* plugin-sdk: keep root alias reflection path non-blocking

* discord+gateway: resolve remaining PR review findings

* gateway+discord: fix codex review regressions

* Discord/Gateway: address Codex review findings

* Gateway: keep agent.wait lifecycle active with shared run IDs

* Discord: clean up status reactions on aborted runs

* fix: add changelog note for ACP/Discord startup hardening (#33699) (thanks @dutifulbob)

---------

Co-authored-by: Onur <2453968+osolmaz@users.noreply.github.com>
This commit is contained in:
Bob
2026-03-04 10:52:28 +01:00
committed by GitHub
parent bd25182d5a
commit 61f7cea48b
30 changed files with 2568 additions and 180 deletions

View File

@@ -12,6 +12,15 @@ describe("buildSystemdUnit", () => {
expect(execStart).toBe('ExecStart=/usr/bin/openclaw gateway --name "My Bot"');
});
it("renders control-group kill mode for child-process cleanup", () => {
const unit = buildSystemdUnit({
description: "OpenClaw Gateway",
programArguments: ["/usr/bin/openclaw", "gateway", "run"],
environment: {},
});
expect(unit).toContain("KillMode=control-group");
});
it("rejects environment values with line breaks", () => {
expect(() =>
buildSystemdUnit({

View File

@@ -59,10 +59,9 @@ export function buildSystemdUnit({
`ExecStart=${execStart}`,
"Restart=always",
"RestartSec=5",
// KillMode=process ensures systemd only waits for the main process to exit.
// Without this, podman's conmon (container monitor) processes block shutdown
// since they run as children of the gateway and stay in the same cgroup.
"KillMode=process",
// Keep service children in the same lifecycle so restarts do not leave
// orphan ACP/runtime workers behind.
"KillMode=control-group",
workingDirLine,
...envLines,
"",