mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-07 17:01:22 +00:00
refactor: unify gateway restart deferral and dispatcher cleanup
This commit is contained in:
@@ -6,7 +6,9 @@ import { ensureBinary } from "./binaries.js";
|
||||
import {
|
||||
__testing,
|
||||
consumeGatewaySigusr1RestartAuthorization,
|
||||
emitGatewayRestart,
|
||||
isGatewaySigusr1RestartExternallyAllowed,
|
||||
markGatewaySigusr1RestartHandled,
|
||||
scheduleGatewaySigusr1Restart,
|
||||
setGatewaySigusr1RestartPolicy,
|
||||
setPreRestartDeferralCheck,
|
||||
@@ -100,6 +102,25 @@ describe("infra runtime", () => {
|
||||
setGatewaySigusr1RestartPolicy({ allowExternal: true });
|
||||
expect(isGatewaySigusr1RestartExternallyAllowed()).toBe(true);
|
||||
});
|
||||
|
||||
it("suppresses duplicate emit until the restart cycle is marked handled", () => {
|
||||
const emitSpy = vi.spyOn(process, "emit");
|
||||
const handler = () => {};
|
||||
process.on("SIGUSR1", handler);
|
||||
try {
|
||||
expect(emitGatewayRestart()).toBe(true);
|
||||
expect(emitGatewayRestart()).toBe(false);
|
||||
expect(consumeGatewaySigusr1RestartAuthorization()).toBe(true);
|
||||
|
||||
markGatewaySigusr1RestartHandled();
|
||||
|
||||
expect(emitGatewayRestart()).toBe(true);
|
||||
const sigusr1Emits = emitSpy.mock.calls.filter((args) => args[0] === "SIGUSR1");
|
||||
expect(sigusr1Emits.length).toBe(2);
|
||||
} finally {
|
||||
process.removeListener("SIGUSR1", handler);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("pre-restart deferral check", () => {
|
||||
|
||||
@@ -13,12 +13,20 @@ export type RestartAttempt = {
|
||||
|
||||
const SPAWN_TIMEOUT_MS = 2000;
|
||||
const SIGUSR1_AUTH_GRACE_MS = 5000;
|
||||
const DEFAULT_DEFERRAL_POLL_MS = 500;
|
||||
const DEFAULT_DEFERRAL_MAX_WAIT_MS = 30_000;
|
||||
|
||||
let sigusr1AuthorizedCount = 0;
|
||||
let sigusr1AuthorizedUntil = 0;
|
||||
let sigusr1ExternalAllowed = false;
|
||||
let preRestartCheck: (() => number) | null = null;
|
||||
let sigusr1Emitted = false;
|
||||
let restartCycleToken = 0;
|
||||
let emittedRestartToken = 0;
|
||||
let consumedRestartToken = 0;
|
||||
|
||||
function hasUnconsumedRestartSignal(): boolean {
|
||||
return emittedRestartToken > consumedRestartToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a callback that scheduleGatewaySigusr1Restart checks before emitting SIGUSR1.
|
||||
@@ -35,10 +43,11 @@ export function setPreRestartDeferralCheck(fn: () => number): void {
|
||||
* to ensure only one restart fires.
|
||||
*/
|
||||
export function emitGatewayRestart(): boolean {
|
||||
if (sigusr1Emitted) {
|
||||
if (hasUnconsumedRestartSignal()) {
|
||||
return false;
|
||||
}
|
||||
sigusr1Emitted = true;
|
||||
const cycleToken = ++restartCycleToken;
|
||||
emittedRestartToken = cycleToken;
|
||||
authorizeGatewaySigusr1Restart();
|
||||
try {
|
||||
if (process.listenerCount("SIGUSR1") > 0) {
|
||||
@@ -47,7 +56,9 @@ export function emitGatewayRestart(): boolean {
|
||||
process.kill(process.pid, "SIGUSR1");
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
// Roll back the cycle marker so future restart requests can still proceed.
|
||||
emittedRestartToken = consumedRestartToken;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -85,10 +96,6 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean {
|
||||
if (sigusr1AuthorizedCount <= 0) {
|
||||
return false;
|
||||
}
|
||||
// Reset the emission guard so the next restart cycle can fire.
|
||||
// The run loop re-enters startGatewayServer() after close(), which
|
||||
// re-registers setPreRestartDeferralCheck and can schedule new restarts.
|
||||
sigusr1Emitted = false;
|
||||
sigusr1AuthorizedCount -= 1;
|
||||
if (sigusr1AuthorizedCount <= 0) {
|
||||
sigusr1AuthorizedUntil = 0;
|
||||
@@ -96,6 +103,80 @@ export function consumeGatewaySigusr1RestartAuthorization(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the currently emitted SIGUSR1 restart cycle as consumed by the run loop.
|
||||
* This explicitly advances the cycle state instead of resetting emit guards inside
|
||||
* consumeGatewaySigusr1RestartAuthorization().
|
||||
*/
|
||||
export function markGatewaySigusr1RestartHandled(): void {
|
||||
if (hasUnconsumedRestartSignal()) {
|
||||
consumedRestartToken = emittedRestartToken;
|
||||
}
|
||||
}
|
||||
|
||||
export type RestartDeferralHooks = {
|
||||
onDeferring?: (pending: number) => void;
|
||||
onReady?: () => void;
|
||||
onTimeout?: (pending: number, elapsedMs: number) => void;
|
||||
onCheckError?: (err: unknown) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
* Poll pending work until it drains (or times out), then emit one restart signal.
|
||||
* Shared by both the direct RPC restart path and the config watcher path.
|
||||
*/
|
||||
export function deferGatewayRestartUntilIdle(opts: {
|
||||
getPendingCount: () => number;
|
||||
hooks?: RestartDeferralHooks;
|
||||
pollMs?: number;
|
||||
maxWaitMs?: number;
|
||||
}): void {
|
||||
const pollMsRaw = opts.pollMs ?? DEFAULT_DEFERRAL_POLL_MS;
|
||||
const pollMs = Math.max(10, Math.floor(pollMsRaw));
|
||||
const maxWaitMsRaw = opts.maxWaitMs ?? DEFAULT_DEFERRAL_MAX_WAIT_MS;
|
||||
const maxWaitMs = Math.max(pollMs, Math.floor(maxWaitMsRaw));
|
||||
|
||||
let pending: number;
|
||||
try {
|
||||
pending = opts.getPendingCount();
|
||||
} catch (err) {
|
||||
opts.hooks?.onCheckError?.(err);
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
if (pending <= 0) {
|
||||
opts.hooks?.onReady?.();
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
|
||||
opts.hooks?.onDeferring?.(pending);
|
||||
const startedAt = Date.now();
|
||||
const poll = setInterval(() => {
|
||||
let current: number;
|
||||
try {
|
||||
current = opts.getPendingCount();
|
||||
} catch (err) {
|
||||
clearInterval(poll);
|
||||
opts.hooks?.onCheckError?.(err);
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
if (current <= 0) {
|
||||
clearInterval(poll);
|
||||
opts.hooks?.onReady?.();
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
const elapsedMs = Date.now() - startedAt;
|
||||
if (elapsedMs >= maxWaitMs) {
|
||||
clearInterval(poll);
|
||||
opts.hooks?.onTimeout?.(current, elapsedMs);
|
||||
emitGatewayRestart();
|
||||
}
|
||||
}, pollMs);
|
||||
}
|
||||
|
||||
function formatSpawnDetail(result: {
|
||||
error?: unknown;
|
||||
status?: number | null;
|
||||
@@ -227,40 +308,14 @@ export function scheduleGatewaySigusr1Restart(opts?: {
|
||||
typeof opts?.reason === "string" && opts.reason.trim()
|
||||
? opts.reason.trim().slice(0, 200)
|
||||
: undefined;
|
||||
const DEFERRAL_POLL_MS = 500;
|
||||
const DEFERRAL_MAX_WAIT_MS = 30_000;
|
||||
|
||||
setTimeout(() => {
|
||||
if (!preRestartCheck) {
|
||||
const pendingCheck = preRestartCheck;
|
||||
if (!pendingCheck) {
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
let pending: number;
|
||||
try {
|
||||
pending = preRestartCheck();
|
||||
} catch {
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
if (pending <= 0) {
|
||||
emitGatewayRestart();
|
||||
return;
|
||||
}
|
||||
// Poll until pending work drains or timeout
|
||||
let waited = 0;
|
||||
const poll = setInterval(() => {
|
||||
waited += DEFERRAL_POLL_MS;
|
||||
let current: number;
|
||||
try {
|
||||
current = preRestartCheck!();
|
||||
} catch {
|
||||
current = 0;
|
||||
}
|
||||
if (current <= 0 || waited >= DEFERRAL_MAX_WAIT_MS) {
|
||||
clearInterval(poll);
|
||||
emitGatewayRestart();
|
||||
}
|
||||
}, DEFERRAL_POLL_MS);
|
||||
deferGatewayRestartUntilIdle({ getPendingCount: pendingCheck });
|
||||
}, delayMs);
|
||||
return {
|
||||
ok: true,
|
||||
@@ -278,6 +333,8 @@ export const __testing = {
|
||||
sigusr1AuthorizedUntil = 0;
|
||||
sigusr1ExternalAllowed = false;
|
||||
preRestartCheck = null;
|
||||
sigusr1Emitted = false;
|
||||
restartCycleToken = 0;
|
||||
emittedRestartToken = 0;
|
||||
consumedRestartToken = 0;
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user