mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 12:51:24 +00:00
fix: defer gateway restart until all replies are sent (#12970)
* fix: defer gateway restart until all replies are sent Fixes a race condition where gateway config changes (e.g., enabling plugins via iMessage) trigger an immediate SIGUSR1 restart, killing the iMessage RPC connection before replies are delivered. Both restart paths (config watcher and RPC-triggered) now defer until all queued operations, pending replies, and embedded agent runs complete (polling every 500ms, 30s timeout). A shared emitGatewayRestart() guard prevents double SIGUSR1 when both paths fire simultaneously. Key changes: - Dispatcher registry tracks active reply dispatchers globally - markComplete() called in finally block for guaranteed cleanup - Pre-restart deferral hook registered at gateway startup - Centralized extractDeliveryInfo() for session key parsing - Post-restart sentinel messages delivered directly (not via agent) - config-patch distinguished from config-apply in sentinel kind Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: single-source gateway restart authorization --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -9,6 +9,7 @@ import {
|
||||
isGatewaySigusr1RestartExternallyAllowed,
|
||||
scheduleGatewaySigusr1Restart,
|
||||
setGatewaySigusr1RestartPolicy,
|
||||
setPreRestartDeferralCheck,
|
||||
} from "./restart.js";
|
||||
import { createTelegramRetryRunner } from "./retry-policy.js";
|
||||
import { getShellPathFromLoginShell, resetShellPathCacheForTests } from "./shell-env.js";
|
||||
@@ -79,11 +80,15 @@ describe("infra runtime", () => {
|
||||
__testing.resetSigusr1State();
|
||||
});
|
||||
|
||||
it("consumes a scheduled authorization once", async () => {
|
||||
it("authorizes exactly once when scheduled restart emits", async () => {
|
||||
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false);
|
||||
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
|
||||
// No pre-authorization before the scheduled emission fires.
|
||||
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false);
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
|
||||
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(true);
|
||||
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(false);
|
||||
|
||||
@@ -97,6 +102,110 @@ describe("infra runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("pre-restart deferral check", () => {
|
||||
beforeEach(() => {
|
||||
__testing.resetSigusr1State();
|
||||
vi.useFakeTimers();
|
||||
vi.spyOn(process, "kill").mockImplementation(() => true);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await vi.runOnlyPendingTimersAsync();
|
||||
vi.useRealTimers();
|
||||
vi.restoreAllMocks();
|
||||
__testing.resetSigusr1State();
|
||||
});
|
||||
|
||||
it("emits SIGUSR1 immediately when no deferral check is registered", async () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
|
||||
it("emits SIGUSR1 immediately when deferral check returns 0", async () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
setPreRestartDeferralCheck(() => 0);
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
|
||||
it("defers SIGUSR1 until deferral check returns 0", async () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
let pending = 2;
|
||||
setPreRestartDeferralCheck(() => pending);
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
|
||||
// After initial delay fires, deferral check returns 2 — should NOT emit yet
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(emitSpy).not.toHaveBeenCalledWith("SIGUSR1");
|
||||
|
||||
// After one poll (500ms), still pending
|
||||
await vi.advanceTimersByTimeAsync(500);
|
||||
expect(emitSpy).not.toHaveBeenCalledWith("SIGUSR1");
|
||||
|
||||
// Drain pending work
|
||||
pending = 0;
|
||||
await vi.advanceTimersByTimeAsync(500);
|
||||
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
|
||||
it("emits SIGUSR1 after deferral timeout even if still pending", async () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
setPreRestartDeferralCheck(() => 5); // always pending
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
|
||||
// Fire initial timeout
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(emitSpy).not.toHaveBeenCalledWith("SIGUSR1");
|
||||
|
||||
// Advance past the 30s max deferral wait
|
||||
await vi.advanceTimersByTimeAsync(30_000);
|
||||
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
|
||||
it("emits SIGUSR1 if deferral check throws", async () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
setPreRestartDeferralCheck(() => {
|
||||
throw new Error("boom");
|
||||
});
|
||||
scheduleGatewaySigusr1Restart({ delayMs: 0 });
|
||||
await vi.advanceTimersByTimeAsync(0);
|
||||
expect(emitSpy).toHaveBeenCalledWith("SIGUSR1");
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("getShellPathFromLoginShell", () => {
|
||||
afterEach(() => resetShellPathCacheForTests());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user