fix: defer gateway restart until all replies are sent (#12970)

* fix: defer gateway restart until all replies are sent

Fixes a race condition where gateway config changes (e.g., enabling
plugins via iMessage) trigger an immediate SIGUSR1 restart, killing the
iMessage RPC connection before replies are delivered.

Both restart paths (config watcher and RPC-triggered) now defer until
all queued operations, pending replies, and embedded agent runs complete
(polling every 500ms, 30s timeout). A shared emitGatewayRestart() guard
prevents double SIGUSR1 when both paths fire simultaneously.

Key changes:
- Dispatcher registry tracks active reply dispatchers globally
- markComplete() called in finally block for guaranteed cleanup
- Pre-restart deferral hook registered at gateway startup
- Centralized extractDeliveryInfo() for session key parsing
- Post-restart sentinel messages delivered directly (not via agent)
- config-patch distinguished from config-apply in sentinel kind

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: single-source gateway restart authorization

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Bridgerz
2026-02-13 15:29:29 -08:00
committed by GitHub
parent dc507f3dec
commit ab4a08a82a
21 changed files with 976 additions and 76 deletions

View File

@@ -18,6 +18,7 @@ import {
restoreRedactedValues,
} from "../../config/redact-snapshot.js";
import { buildConfigSchema, type ConfigSchemaResponse } from "../../config/schema.js";
import { extractDeliveryInfo } from "../../config/sessions.js";
import {
formatDoctorNonInteractiveHint,
type RestartSentinelPayload,
@@ -315,11 +316,17 @@ export const configHandlers: GatewayRequestHandlers = {
? Math.max(0, Math.floor(restartDelayMsRaw))
: undefined;
// Extract deliveryContext + threadId for routing after restart
// Supports both :thread: (most channels) and :topic: (Telegram)
const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey);
const payload: RestartSentinelPayload = {
kind: "config-apply",
kind: "config-patch",
status: "ok",
ts: Date.now(),
sessionKey,
deliveryContext,
threadId,
message: note ?? null,
doctorHint: formatDoctorNonInteractiveHint(),
stats: {
@@ -422,11 +429,18 @@ export const configHandlers: GatewayRequestHandlers = {
? Math.max(0, Math.floor(restartDelayMsRaw))
: undefined;
// Extract deliveryContext + threadId for routing after restart
// Supports both :thread: (most channels) and :topic: (Telegram)
const { deliveryContext: deliveryContextApply, threadId: threadIdApply } =
extractDeliveryInfo(sessionKey);
const payload: RestartSentinelPayload = {
kind: "config-apply",
status: "ok",
ts: Date.now(),
sessionKey,
deliveryContext: deliveryContextApply,
threadId: threadIdApply,
message: note ?? null,
doctorHint: formatDoctorNonInteractiveHint(),
stats: {

View File

@@ -2,15 +2,14 @@ import type { CliDeps } from "../cli/deps.js";
import type { loadConfig } from "../config/config.js";
import type { HeartbeatRunner } from "../infra/heartbeat-runner.js";
import type { ChannelKind, GatewayReloadPlan } from "./config-reload.js";
import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/runs.js";
import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
import { resolveAgentMaxConcurrent, resolveSubagentMaxConcurrent } from "../config/agent-limits.js";
import { startGmailWatcher, stopGmailWatcher } from "../hooks/gmail-watcher.js";
import { isTruthyEnvValue } from "../infra/env.js";
import { resetDirectoryCache } from "../infra/outbound/target-resolver.js";
import {
authorizeGatewaySigusr1Restart,
setGatewaySigusr1RestartPolicy,
} from "../infra/restart.js";
import { setCommandLaneConcurrency } from "../process/command-queue.js";
import { emitGatewayRestart, setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
import { setCommandLaneConcurrency, getTotalQueueSize } from "../process/command-queue.js";
import { CommandLane } from "../process/lanes.js";
import { resolveHooksConfig } from "./hooks.js";
import { startBrowserControlServerIfEnabled } from "./server-browser.js";
@@ -140,6 +139,8 @@ export function createGatewayReloadHandlers(params: {
params.setState(nextState);
};
let restartPending = false;
const requestGatewayRestart = (
plan: GatewayReloadPlan,
nextConfig: ReturnType<typeof loadConfig>,
@@ -148,13 +149,85 @@ export function createGatewayReloadHandlers(params: {
const reasons = plan.restartReasons.length
? plan.restartReasons.join(", ")
: plan.changedPaths.join(", ");
params.logReload.warn(`config change requires gateway restart (${reasons})`);
if (process.listenerCount("SIGUSR1") === 0) {
params.logReload.warn("no SIGUSR1 listener found; restart skipped");
return;
}
authorizeGatewaySigusr1Restart();
process.emit("SIGUSR1");
// Check if there are active operations (commands in queue, pending replies, or embedded runs)
const queueSize = getTotalQueueSize();
const pendingReplies = getTotalPendingReplies();
const embeddedRuns = getActiveEmbeddedRunCount();
const totalActive = queueSize + pendingReplies + embeddedRuns;
if (totalActive > 0) {
// Avoid spinning up duplicate polling loops from repeated config changes.
if (restartPending) {
params.logReload.info(
`config change requires gateway restart (${reasons}) — already waiting for operations to complete`,
);
return;
}
restartPending = true;
const details = [];
if (queueSize > 0) {
details.push(`${queueSize} queued operation(s)`);
}
if (pendingReplies > 0) {
details.push(`${pendingReplies} pending reply(ies)`);
}
if (embeddedRuns > 0) {
details.push(`${embeddedRuns} embedded run(s)`);
}
params.logReload.warn(
`config change requires gateway restart (${reasons}) — deferring until ${details.join(", ")} complete`,
);
// Wait for all operations and replies to complete before restarting (max 30 seconds)
const maxWaitMs = 30_000;
const checkIntervalMs = 500;
const startTime = Date.now();
const checkAndRestart = () => {
const currentQueueSize = getTotalQueueSize();
const currentPendingReplies = getTotalPendingReplies();
const currentEmbeddedRuns = getActiveEmbeddedRunCount();
const currentTotalActive = currentQueueSize + currentPendingReplies + currentEmbeddedRuns;
const elapsed = Date.now() - startTime;
if (currentTotalActive === 0) {
restartPending = false;
params.logReload.info("all operations and replies completed; restarting gateway now");
emitGatewayRestart();
} else if (elapsed >= maxWaitMs) {
const remainingDetails = [];
if (currentQueueSize > 0) {
remainingDetails.push(`${currentQueueSize} operation(s)`);
}
if (currentPendingReplies > 0) {
remainingDetails.push(`${currentPendingReplies} reply(ies)`);
}
if (currentEmbeddedRuns > 0) {
remainingDetails.push(`${currentEmbeddedRuns} embedded run(s)`);
}
restartPending = false;
params.logReload.warn(
`restart timeout after ${elapsed}ms with ${remainingDetails.join(", ")} still active; restarting anyway`,
);
emitGatewayRestart();
} else {
// Check again soon
setTimeout(checkAndRestart, checkIntervalMs);
}
};
setTimeout(checkAndRestart, checkIntervalMs);
} else {
// No active operations or pending replies, restart immediately
params.logReload.warn(`config change requires gateway restart (${reasons})`);
emitGatewayRestart();
}
};
return { applyHotReload, requestGatewayRestart };

View File

@@ -0,0 +1,151 @@
/**
* E2E test for config reload during active reply sending.
* Tests that gateway restart is properly deferred until replies are sent.
*/
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
clearAllDispatchers,
getTotalPendingReplies,
} from "../auto-reply/reply/dispatcher-registry.js";
// Helper to flush all pending microtasks
async function flushMicrotasks() {
for (let i = 0; i < 10; i++) {
await Promise.resolve();
}
}
describe("gateway config reload during reply", () => {
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(async () => {
vi.restoreAllMocks();
// Wait for any pending microtasks (from markComplete()) to complete
await flushMicrotasks();
clearAllDispatchers();
});
it("should defer restart until reply dispatcher completes", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalQueueSize } = await import("../process/command-queue.js");
// Create a dispatcher (simulating message handling)
let deliveredReplies: string[] = [];
const dispatcher = createReplyDispatcher({
deliver: async (payload) => {
// Simulate async reply delivery
await new Promise((resolve) => setTimeout(resolve, 100));
deliveredReplies.push(payload.text ?? "");
},
onError: (err) => {
throw err;
},
});
// Initially: pending=1 (reservation)
expect(getTotalPendingReplies()).toBe(1);
// Simulate command finishing and enqueuing reply
dispatcher.sendFinalReply({ text: "Configuration updated successfully!" });
// Now: pending=2 (reservation + 1 enqueued reply)
expect(getTotalPendingReplies()).toBe(2);
// Mark dispatcher complete (flags reservation for cleanup on last delivery)
dispatcher.markComplete();
// Reservation is still counted until the delivery .finally() clears it,
// but the important invariant is pending > 0 while delivery is in flight.
expect(getTotalPendingReplies()).toBeGreaterThan(0);
// At this point, if gateway restart was requested, it should defer
// because getTotalPendingReplies() > 0
// Wait for reply to be delivered
await dispatcher.waitForIdle();
// Now: pending=0 (reply sent)
expect(getTotalPendingReplies()).toBe(0);
expect(deliveredReplies).toEqual(["Configuration updated successfully!"]);
// Now restart can proceed safely
expect(getTotalQueueSize()).toBe(0);
expect(getTotalPendingReplies()).toBe(0);
});
it("should handle dispatcher reservation correctly when no replies sent", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
let deliverCalled = false;
const dispatcher = createReplyDispatcher({
deliver: async () => {
deliverCalled = true;
},
});
// Initially: pending=1 (reservation)
expect(getTotalPendingReplies()).toBe(1);
// Mark complete without sending any replies
dispatcher.markComplete();
// Reservation is cleared via microtask — flush it
await flushMicrotasks();
// Now: pending=0 (reservation cleared, no replies were enqueued)
expect(getTotalPendingReplies()).toBe(0);
// Wait for idle (should resolve immediately since no replies)
await dispatcher.waitForIdle();
expect(deliverCalled).toBe(false);
expect(getTotalPendingReplies()).toBe(0);
});
it("should integrate dispatcher reservation with concurrent dispatchers", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalQueueSize } = await import("../process/command-queue.js");
const deliveredReplies: string[] = [];
const dispatcher = createReplyDispatcher({
deliver: async (payload) => {
await new Promise((resolve) => setTimeout(resolve, 50));
deliveredReplies.push(payload.text ?? "");
},
});
// Dispatcher has reservation (pending=1)
expect(getTotalPendingReplies()).toBe(1);
// Total active = queue + pending
const totalActive = getTotalQueueSize() + getTotalPendingReplies();
expect(totalActive).toBe(1); // 0 queue + 1 pending
// Command finishes, replies enqueued
dispatcher.sendFinalReply({ text: "Reply 1" });
dispatcher.sendFinalReply({ text: "Reply 2" });
// Now: pending=3 (reservation + 2 replies)
expect(getTotalPendingReplies()).toBe(3);
// Mark complete (flags reservation for cleanup on last delivery)
dispatcher.markComplete();
// Reservation still counted until delivery .finally() clears it,
// but the important invariant is pending > 0 while deliveries are in flight.
expect(getTotalPendingReplies()).toBeGreaterThan(0);
// Wait for replies
await dispatcher.waitForIdle();
// Replies sent, pending=0
expect(getTotalPendingReplies()).toBe(0);
expect(deliveredReplies).toEqual(["Reply 1", "Reply 2"]);
// Now everything is idle
expect(getTotalPendingReplies()).toBe(0);
expect(getTotalQueueSize()).toBe(0);
});
});

View File

@@ -0,0 +1,199 @@
/**
* Integration test simulating full message handling + config change + reply flow.
* This tests the complete scenario where a user configures an adapter via chat
* and ensures they get a reply before the gateway restarts.
*/
import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
describe("gateway restart deferral integration", () => {
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(async () => {
vi.restoreAllMocks();
// Wait for any pending microtasks (from markComplete()) to complete
await Promise.resolve();
const { clearAllDispatchers } = await import("../auto-reply/reply/dispatcher-registry.js");
clearAllDispatchers();
});
it("should defer restart until dispatcher completes with reply", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
const { getTotalQueueSize } = await import("../process/command-queue.js");
const events: string[] = [];
// T=0: Message received — dispatcher created (pending=1 reservation)
events.push("message-received");
const deliveredReplies: Array<{ text: string; timestamp: number }> = [];
const dispatcher = createReplyDispatcher({
deliver: async (payload) => {
// Simulate network delay
await new Promise((resolve) => setTimeout(resolve, 100));
deliveredReplies.push({
text: payload.text ?? "",
timestamp: Date.now(),
});
events.push(`reply-delivered: ${payload.text}`);
},
});
events.push("dispatcher-created");
// T=1: Config change detected
events.push("config-change-detected");
// Check if restart should be deferred
const queueSize = getTotalQueueSize();
const pendingReplies = getTotalPendingReplies();
const totalActive = queueSize + pendingReplies;
events.push(`defer-check: queue=${queueSize} pending=${pendingReplies} total=${totalActive}`);
// Should defer because dispatcher has reservation
expect(totalActive).toBeGreaterThan(0);
expect(pendingReplies).toBe(1); // reservation
if (totalActive > 0) {
events.push("restart-deferred");
}
// T=2: Command finishes, enqueue replies
dispatcher.sendFinalReply({ text: "Adapter configured successfully!" });
dispatcher.sendFinalReply({ text: "Gateway will restart to apply changes." });
events.push("replies-enqueued");
// Now pending should be 3 (reservation + 2 replies)
expect(getTotalPendingReplies()).toBe(3);
// Mark command complete (flags reservation for cleanup on last delivery)
dispatcher.markComplete();
events.push("command-complete");
// Reservation still counted until delivery .finally() clears it,
// but the important invariant is pending > 0 while deliveries are in flight.
expect(getTotalPendingReplies()).toBeGreaterThan(0);
// T=3: Wait for replies to be delivered
await dispatcher.waitForIdle();
events.push("dispatcher-idle");
// Replies should be delivered
expect(deliveredReplies).toHaveLength(2);
expect(deliveredReplies[0].text).toBe("Adapter configured successfully!");
expect(deliveredReplies[1].text).toBe("Gateway will restart to apply changes.");
// Pending should be 0
expect(getTotalPendingReplies()).toBe(0);
// T=4: Check if restart can proceed
const finalQueueSize = getTotalQueueSize();
const finalPendingReplies = getTotalPendingReplies();
const finalTotalActive = finalQueueSize + finalPendingReplies;
events.push(
`restart-check: queue=${finalQueueSize} pending=${finalPendingReplies} total=${finalTotalActive}`,
);
// Everything should be idle now
expect(finalTotalActive).toBe(0);
events.push("restart-can-proceed");
// Verify event sequence
expect(events).toEqual([
"message-received",
"dispatcher-created",
"config-change-detected",
"defer-check: queue=0 pending=1 total=1",
"restart-deferred",
"replies-enqueued",
"command-complete",
"reply-delivered: Adapter configured successfully!",
"reply-delivered: Gateway will restart to apply changes.",
"dispatcher-idle",
"restart-check: queue=0 pending=0 total=0",
"restart-can-proceed",
]);
});
it("should handle concurrent dispatchers with config changes", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
// Simulate two messages being processed concurrently
const deliveredReplies: string[] = [];
// Message 1 — dispatcher created
const dispatcher1 = createReplyDispatcher({
deliver: async (payload) => {
await new Promise((resolve) => setTimeout(resolve, 50));
deliveredReplies.push(`msg1: ${payload.text}`);
},
});
// Message 2 — dispatcher created
const dispatcher2 = createReplyDispatcher({
deliver: async (payload) => {
await new Promise((resolve) => setTimeout(resolve, 50));
deliveredReplies.push(`msg2: ${payload.text}`);
},
});
// Both dispatchers have reservations
expect(getTotalPendingReplies()).toBe(2);
// Config change detected - should defer
const totalActive = getTotalPendingReplies();
expect(totalActive).toBe(2); // 2 dispatcher reservations
// Messages process and send replies
dispatcher1.sendFinalReply({ text: "Reply from message 1" });
dispatcher1.markComplete();
dispatcher2.sendFinalReply({ text: "Reply from message 2" });
dispatcher2.markComplete();
// Wait for both
await Promise.all([dispatcher1.waitForIdle(), dispatcher2.waitForIdle()]);
// All idle
expect(getTotalPendingReplies()).toBe(0);
// Replies delivered
expect(deliveredReplies).toHaveLength(2);
});
it("should handle rapid config changes without losing replies", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
const deliveredReplies: string[] = [];
// Message received — dispatcher created
const dispatcher = createReplyDispatcher({
deliver: async (payload) => {
await new Promise((resolve) => setTimeout(resolve, 200)); // Slow network
deliveredReplies.push(payload.text ?? "");
},
});
// Config change 1, 2, 3 (rapid changes)
// All should be deferred because dispatcher has pending replies
// Send replies
dispatcher.sendFinalReply({ text: "Processing..." });
dispatcher.sendFinalReply({ text: "Almost done..." });
dispatcher.sendFinalReply({ text: "Complete!" });
dispatcher.markComplete();
// Wait for all replies
await dispatcher.waitForIdle();
// All replies should be delivered
expect(deliveredReplies).toEqual(["Processing...", "Almost done...", "Complete!"]);
// Now restart can proceed
expect(getTotalPendingReplies()).toBe(0);
});
});

View File

@@ -0,0 +1,121 @@
/**
* REAL scenario test - simulates actual message handling with config changes.
* This test MUST fail if "imsg rpc not running" would occur in production.
*/
import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
describe("real scenario: config change during message processing", () => {
let replyErrors: string[] = [];
beforeEach(() => {
vi.clearAllMocks();
replyErrors = [];
});
afterEach(async () => {
vi.restoreAllMocks();
// Wait for any pending microtasks (from markComplete()) to complete
await Promise.resolve();
const { clearAllDispatchers } = await import("../auto-reply/reply/dispatcher-registry.js");
clearAllDispatchers();
});
it("should NOT restart gateway while reply delivery is in flight", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
let rpcConnected = true;
const deliveredReplies: string[] = [];
// Create dispatcher with slow delivery (simulates real network delay)
const dispatcher = createReplyDispatcher({
deliver: async (payload) => {
if (!rpcConnected) {
const error = "Error: imsg rpc not running";
replyErrors.push(error);
throw new Error(error);
}
// Slow delivery — restart checks will run during this window
await new Promise((resolve) => setTimeout(resolve, 500));
deliveredReplies.push(payload.text ?? "");
},
onError: () => {
// Swallow delivery errors so the test can assert on replyErrors
},
});
// Enqueue reply and immediately clear the reservation.
// This is the critical sequence: after markComplete(), the ONLY thing
// keeping pending > 0 is the in-flight delivery itself.
dispatcher.sendFinalReply({ text: "Configuration updated!" });
dispatcher.markComplete();
// At this point: markComplete flagged, delivery is in flight.
// pending > 0 because the in-flight delivery keeps it alive.
const pendingDuringDelivery = getTotalPendingReplies();
expect(pendingDuringDelivery).toBeGreaterThan(0);
// Simulate restart checks while delivery is in progress.
// If the tracking is broken, pending would be 0 and we'd restart.
let restartTriggered = false;
for (let i = 0; i < 3; i++) {
await new Promise((resolve) => setTimeout(resolve, 100));
const pending = getTotalPendingReplies();
if (pending === 0) {
restartTriggered = true;
rpcConnected = false;
break;
}
}
// Wait for delivery to complete
await dispatcher.waitForIdle();
// Now pending should be 0 — restart can proceed
expect(getTotalPendingReplies()).toBe(0);
// CRITICAL: delivery must have succeeded without RPC being killed
expect(restartTriggered).toBe(false);
expect(replyErrors).toEqual([]);
expect(deliveredReplies).toEqual(["Configuration updated!"]);
});
it("should keep pending > 0 until reply is actually enqueued", async () => {
const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
const dispatcher = createReplyDispatcher({
deliver: async (_payload) => {
await new Promise((resolve) => setTimeout(resolve, 50));
},
});
// Initially: pending=1 (reservation)
expect(getTotalPendingReplies()).toBe(1);
// Simulate command processing delay BEFORE reply is enqueued
await new Promise((resolve) => setTimeout(resolve, 100));
// During this delay, pending should STILL be 1 (reservation active)
expect(getTotalPendingReplies()).toBe(1);
// Now enqueue reply
dispatcher.sendFinalReply({ text: "Reply" });
// Now pending should be 2 (reservation + reply)
expect(getTotalPendingReplies()).toBe(2);
// Mark complete
dispatcher.markComplete();
// After markComplete, pending should still be > 0 if reply hasn't sent yet
const pendingAfterMarkComplete = getTotalPendingReplies();
expect(pendingAfterMarkComplete).toBeGreaterThan(0);
// Wait for reply to send
await dispatcher.waitForIdle();
// Now pending should be 0
expect(getTotalPendingReplies()).toBe(0);
});
});

View File

@@ -1,8 +1,8 @@
import type { CliDeps } from "../cli/deps.js";
import { resolveAnnounceTargetFromKey } from "../agents/tools/sessions-send-helpers.js";
import { normalizeChannelId } from "../channels/plugins/index.js";
import { agentCommand } from "../commands/agent.js";
import { resolveMainSessionKeyFromConfig } from "../config/sessions.js";
import { deliverOutboundPayloads } from "../infra/outbound/deliver.js";
import { resolveOutboundTarget } from "../infra/outbound/targets.js";
import {
consumeRestartSentinel,
@@ -10,11 +10,10 @@ import {
summarizeRestartSentinel,
} from "../infra/restart-sentinel.js";
import { enqueueSystemEvent } from "../infra/system-events.js";
import { defaultRuntime } from "../runtime.js";
import { deliveryContextFromSession, mergeDeliveryContext } from "../utils/delivery-context.js";
import { loadSessionEntry } from "./session-utils.js";
export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
export async function scheduleRestartSentinelWake(_params: { deps: CliDeps }) {
const sentinel = await consumeRestartSentinel();
if (!sentinel) {
return;
@@ -86,20 +85,15 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
(origin?.threadId != null ? String(origin.threadId) : undefined);
try {
await agentCommand(
{
message,
sessionKey,
to: resolved.to,
channel,
deliver: true,
bestEffortDeliver: true,
messageChannel: channel,
threadId,
},
defaultRuntime,
params.deps,
);
await deliverOutboundPayloads({
cfg,
channel,
to: resolved.to,
accountId: origin?.accountId,
threadId,
payloads: [{ text: message }],
bestEffort: true,
});
} catch (err) {
enqueueSystemEvent(`${summary}\n${String(err)}`, { sessionKey });
}

View File

@@ -5,8 +5,10 @@ import type { RuntimeEnv } from "../runtime.js";
import type { ControlUiRootState } from "./control-ui.js";
import type { startBrowserControlServerIfEnabled } from "./server-browser.js";
import { resolveAgentWorkspaceDir, resolveDefaultAgentId } from "../agents/agent-scope.js";
import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/runs.js";
import { registerSkillsChangeListener } from "../agents/skills/refresh.js";
import { initSubagentRegistry } from "../agents/subagent-registry.js";
import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
import { type ChannelId, listChannelPlugins } from "../channels/plugins/index.js";
import { formatCliCommand } from "../cli/command-format.js";
import { createDefaultDeps } from "../cli/deps.js";
@@ -32,7 +34,7 @@ import { onHeartbeatEvent } from "../infra/heartbeat-events.js";
import { startHeartbeatRunner } from "../infra/heartbeat-runner.js";
import { getMachineDisplayName } from "../infra/machine-name.js";
import { ensureOpenClawCliOnPath } from "../infra/path-env.js";
import { setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js";
import {
primeRemoteSkillsCache,
refreshRemoteBinsForConnectedNodes,
@@ -42,6 +44,7 @@ import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js";
import { startDiagnosticHeartbeat, stopDiagnosticHeartbeat } from "../logging/diagnostic.js";
import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js";
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
import { getTotalQueueSize } from "../process/command-queue.js";
import { runOnboardingWizard } from "../wizard/onboarding.js";
import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.js";
import { startGatewayConfigReloader } from "./config-reload.js";
@@ -225,6 +228,9 @@ export async function startGatewayServer(
startDiagnosticHeartbeat();
}
setGatewaySigusr1RestartPolicy({ allowExternal: cfgAtStart.commands?.restart === true });
setPreRestartDeferralCheck(
() => getTotalQueueSize() + getTotalPendingReplies() + getActiveEmbeddedRunCount(),
);
initSubagentRegistry();
const defaultAgentId = resolveDefaultAgentId(cfgAtStart);
const defaultWorkspaceDir = resolveAgentWorkspaceDir(cfgAtStart, defaultAgentId);