diff --git a/CHANGELOG.md b/CHANGELOG.md index c110e2f612f..c7252c469cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai - Android/Nodes: harden `app.update` by requiring HTTPS and gateway-host URL matching plus SHA-256 verification, stream URL camera downloads to disk with size guards to avoid memory spikes, and stop signing release builds with debug keys. (#13541) Thanks @smartprogrammer93. - Auto-reply/Threading: auto-inject implicit reply threading so `replyToMode` works without requiring model-emitted `[[reply_to_current]]`, while preserving `replyToMode: "off"` behavior for implicit Slack replies and keeping block-streaming chunk coalescing stable under `replyToMode: "first"`. (#14976) Thanks @Diaspar4u. - Sandbox: pass configured `sandbox.docker.env` variables to sandbox containers at `docker create` time. (#15138) Thanks @stevebot-alive. +- Gateway/Restart: clear stale command-queue and heartbeat wake runtime state after SIGUSR1 in-process restarts to prevent zombie gateway behavior where queued work stops draining. (#15195) Thanks @joeykrug. - Onboarding/CLI: restore terminal state without resuming paused `stdin`, so onboarding exits cleanly after choosing Web UI and the installer returns instead of appearing stuck. - Auth/OpenAI Codex: share OAuth login handling across onboarding and `models auth login --provider openai-codex`, keep onboarding alive when OAuth fails, and surface a direct OAuth help note instead of terminating the wizard. (#15406, follow-up to #14552) Thanks @zhiluo20. - Onboarding/Providers: add vLLM as an onboarding provider with model discovery, auth profile wiring, and non-interactive auth-choice validation. (#12577) Thanks @gejifeng. diff --git a/src/cli/gateway-cli/run-loop.test.ts b/src/cli/gateway-cli/run-loop.test.ts new file mode 100644 index 00000000000..fdb94aa0ff2 --- /dev/null +++ b/src/cli/gateway-cli/run-loop.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it, vi } from "vitest"; + +const acquireGatewayLock = vi.fn(async () => ({ + release: vi.fn(async () => {}), +})); +const consumeGatewaySigusr1RestartAuthorization = vi.fn(() => true); +const isGatewaySigusr1RestartExternallyAllowed = vi.fn(() => false); +const getActiveTaskCount = vi.fn(() => 0); +const waitForActiveTasks = vi.fn(async () => ({ drained: true })); +const resetAllLanes = vi.fn(); +const gatewayLog = { + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), +}; + +vi.mock("../../infra/gateway-lock.js", () => ({ + acquireGatewayLock: () => acquireGatewayLock(), +})); + +vi.mock("../../infra/restart.js", () => ({ + consumeGatewaySigusr1RestartAuthorization: () => consumeGatewaySigusr1RestartAuthorization(), + isGatewaySigusr1RestartExternallyAllowed: () => isGatewaySigusr1RestartExternallyAllowed(), +})); + +vi.mock("../../process/command-queue.js", () => ({ + getActiveTaskCount: () => getActiveTaskCount(), + waitForActiveTasks: (timeoutMs: number) => waitForActiveTasks(timeoutMs), + resetAllLanes: () => resetAllLanes(), +})); + +vi.mock("../../logging/subsystem.js", () => ({ + createSubsystemLogger: () => gatewayLog, +})); + +function removeNewSignalListeners( + signal: NodeJS.Signals, + existing: Set<(...args: unknown[]) => void>, +) { + for (const listener of process.listeners(signal)) { + const fn = listener as (...args: unknown[]) => void; + if (!existing.has(fn)) { + process.removeListener(signal, fn); + } + } +} + +describe("runGatewayLoop", () => { + it("restarts after SIGUSR1 even when drain times out, and resets lanes for the new iteration", async () => { + vi.clearAllMocks(); + getActiveTaskCount.mockReturnValueOnce(2).mockReturnValueOnce(0); + waitForActiveTasks.mockResolvedValueOnce({ drained: false }); + + const closeFirst = vi.fn(async () => {}); + const closeSecond = vi.fn(async () => {}); + const start = vi + .fn< + () => Promise<{ + close: (opts: { reason: string; restartExpectedMs: number | null }) => Promise; + }> + >() + .mockResolvedValueOnce({ close: closeFirst }) + .mockResolvedValueOnce({ close: closeSecond }) + .mockRejectedValueOnce(new Error("stop-loop")); + + const beforeSigterm = new Set( + process.listeners("SIGTERM") as Array<(...args: unknown[]) => void>, + ); + const beforeSigint = new Set( + process.listeners("SIGINT") as Array<(...args: unknown[]) => void>, + ); + const beforeSigusr1 = new Set( + process.listeners("SIGUSR1") as Array<(...args: unknown[]) => void>, + ); + + const loopPromise = import("./run-loop.js").then(({ runGatewayLoop }) => + runGatewayLoop({ + start, + runtime: { + exit: vi.fn(), + } as { exit: (code: number) => never }, + }), + ); + + try { + await vi.waitFor(() => { + expect(start).toHaveBeenCalledTimes(1); + }); + + process.emit("SIGUSR1"); + + await vi.waitFor(() => { + expect(start).toHaveBeenCalledTimes(2); + }); + + expect(waitForActiveTasks).toHaveBeenCalledWith(30_000); + expect(gatewayLog.warn).toHaveBeenCalledWith( + "drain timeout reached; proceeding with restart", + ); + expect(closeFirst).toHaveBeenCalledWith({ + reason: "gateway restarting", + restartExpectedMs: 1500, + }); + expect(resetAllLanes).toHaveBeenCalledTimes(1); + + process.emit("SIGUSR1"); + + await expect(loopPromise).rejects.toThrow("stop-loop"); + expect(closeSecond).toHaveBeenCalledWith({ + reason: "gateway restarting", + restartExpectedMs: 1500, + }); + expect(resetAllLanes).toHaveBeenCalledTimes(2); + } finally { + removeNewSignalListeners("SIGTERM", beforeSigterm); + removeNewSignalListeners("SIGINT", beforeSigint); + removeNewSignalListeners("SIGUSR1", beforeSigusr1); + } + }); +});