fix: defer gateway restart until all replies are sent (#12970)

* fix: defer gateway restart until all replies are sent Fixes a race condition where gateway config changes (e.g., enabling plugins via iMessage) trigger an immediate SIGUSR1 restart, killing the iMessage RPC connection before replies are delivered. Both restart paths (config watcher and RPC-triggered) now defer until all queued operations, pending replies, and embedded agent runs complete (polling every 500ms, 30s timeout). A shared emitGatewayRestart() guard prevents double SIGUSR1 when both paths fire simultaneously. Key changes: - Dispatcher registry tracks active reply dispatchers globally - markComplete() called in finally block for guaranteed cleanup - Pre-restart deferral hook registered at gateway startup - Centralized extractDeliveryInfo() for session key parsing - Post-restart sentinel messages delivered directly (not via agent) - config-patch distinguished from config-apply in sentinel kind Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: single-source gateway restart authorization --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-05-08 04:51:25 +00:00 · 2026-02-13 15:29:29 -08:00
parent dc507f3dec
commit ab4a08a82a
21 changed files with 976 additions and 76 deletions
--- a/src/gateway/server-methods/config.ts
+++ b/src/gateway/server-methods/config.ts
@@ -18,6 +18,7 @@ import {
  restoreRedactedValues,
 } from "../../config/redact-snapshot.js";
 import { buildConfigSchema, type ConfigSchemaResponse } from "../../config/schema.js";
+import { extractDeliveryInfo } from "../../config/sessions.js";
 import {
  formatDoctorNonInteractiveHint,
  type RestartSentinelPayload,
@@ -315,11 +316,17 @@ export const configHandlers: GatewayRequestHandlers = {
        ? Math.max(0, Math.floor(restartDelayMsRaw))
        : undefined;

+    // Extract deliveryContext + threadId for routing after restart
+    // Supports both :thread: (most channels) and :topic: (Telegram)
+    const { deliveryContext, threadId } = extractDeliveryInfo(sessionKey);
+
    const payload: RestartSentinelPayload = {
-      kind: "config-apply",
+      kind: "config-patch",
      status: "ok",
      ts: Date.now(),
      sessionKey,
+      deliveryContext,
+      threadId,
      message: note ?? null,
      doctorHint: formatDoctorNonInteractiveHint(),
      stats: {
@@ -422,11 +429,18 @@ export const configHandlers: GatewayRequestHandlers = {
        ? Math.max(0, Math.floor(restartDelayMsRaw))
        : undefined;

+    // Extract deliveryContext + threadId for routing after restart
+    // Supports both :thread: (most channels) and :topic: (Telegram)
+    const { deliveryContext: deliveryContextApply, threadId: threadIdApply } =
+      extractDeliveryInfo(sessionKey);
+
    const payload: RestartSentinelPayload = {
      kind: "config-apply",
      status: "ok",
      ts: Date.now(),
      sessionKey,
+      deliveryContext: deliveryContextApply,
+      threadId: threadIdApply,
      message: note ?? null,
      doctorHint: formatDoctorNonInteractiveHint(),
      stats: {
--- a/src/gateway/server-reload-handlers.ts
+++ b/src/gateway/server-reload-handlers.ts
@@ -2,15 +2,14 @@ import type { CliDeps } from "../cli/deps.js";
 import type { loadConfig } from "../config/config.js";
 import type { HeartbeatRunner } from "../infra/heartbeat-runner.js";
 import type { ChannelKind, GatewayReloadPlan } from "./config-reload.js";
+import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/runs.js";
+import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
 import { resolveAgentMaxConcurrent, resolveSubagentMaxConcurrent } from "../config/agent-limits.js";
 import { startGmailWatcher, stopGmailWatcher } from "../hooks/gmail-watcher.js";
 import { isTruthyEnvValue } from "../infra/env.js";
 import { resetDirectoryCache } from "../infra/outbound/target-resolver.js";
-import {
-  authorizeGatewaySigusr1Restart,
-  setGatewaySigusr1RestartPolicy,
-} from "../infra/restart.js";
-import { setCommandLaneConcurrency } from "../process/command-queue.js";
+import { emitGatewayRestart, setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
+import { setCommandLaneConcurrency, getTotalQueueSize } from "../process/command-queue.js";
 import { CommandLane } from "../process/lanes.js";
 import { resolveHooksConfig } from "./hooks.js";
 import { startBrowserControlServerIfEnabled } from "./server-browser.js";
@@ -140,6 +139,8 @@ export function createGatewayReloadHandlers(params: {
    params.setState(nextState);
  };

+  let restartPending = false;
+
  const requestGatewayRestart = (
    plan: GatewayReloadPlan,
    nextConfig: ReturnType<typeof loadConfig>,
@@ -148,13 +149,85 @@ export function createGatewayReloadHandlers(params: {
    const reasons = plan.restartReasons.length
      ? plan.restartReasons.join(", ")
      : plan.changedPaths.join(", ");
-    params.logReload.warn(`config change requires gateway restart (${reasons})`);
+
    if (process.listenerCount("SIGUSR1") === 0) {
      params.logReload.warn("no SIGUSR1 listener found; restart skipped");
      return;
    }
-    authorizeGatewaySigusr1Restart();
-    process.emit("SIGUSR1");
+
+    // Check if there are active operations (commands in queue, pending replies, or embedded runs)
+    const queueSize = getTotalQueueSize();
+    const pendingReplies = getTotalPendingReplies();
+    const embeddedRuns = getActiveEmbeddedRunCount();
+    const totalActive = queueSize + pendingReplies + embeddedRuns;
+
+    if (totalActive > 0) {
+      // Avoid spinning up duplicate polling loops from repeated config changes.
+      if (restartPending) {
+        params.logReload.info(
+          `config change requires gateway restart (${reasons}) — already waiting for operations to complete`,
+        );
+        return;
+      }
+      restartPending = true;
+      const details = [];
+      if (queueSize > 0) {
+        details.push(`${queueSize} queued operation(s)`);
+      }
+      if (pendingReplies > 0) {
+        details.push(`${pendingReplies} pending reply(ies)`);
+      }
+      if (embeddedRuns > 0) {
+        details.push(`${embeddedRuns} embedded run(s)`);
+      }
+      params.logReload.warn(
+        `config change requires gateway restart (${reasons}) — deferring until ${details.join(", ")} complete`,
+      );
+
+      // Wait for all operations and replies to complete before restarting (max 30 seconds)
+      const maxWaitMs = 30_000;
+      const checkIntervalMs = 500;
+      const startTime = Date.now();
+
+      const checkAndRestart = () => {
+        const currentQueueSize = getTotalQueueSize();
+        const currentPendingReplies = getTotalPendingReplies();
+        const currentEmbeddedRuns = getActiveEmbeddedRunCount();
+        const currentTotalActive = currentQueueSize + currentPendingReplies + currentEmbeddedRuns;
+        const elapsed = Date.now() - startTime;
+
+        if (currentTotalActive === 0) {
+          restartPending = false;
+          params.logReload.info("all operations and replies completed; restarting gateway now");
+          emitGatewayRestart();
+        } else if (elapsed >= maxWaitMs) {
+          const remainingDetails = [];
+          if (currentQueueSize > 0) {
+            remainingDetails.push(`${currentQueueSize} operation(s)`);
+          }
+          if (currentPendingReplies > 0) {
+            remainingDetails.push(`${currentPendingReplies} reply(ies)`);
+          }
+          if (currentEmbeddedRuns > 0) {
+            remainingDetails.push(`${currentEmbeddedRuns} embedded run(s)`);
+          }
+          restartPending = false;
+          params.logReload.warn(
+            `restart timeout after ${elapsed}ms with ${remainingDetails.join(", ")} still active; restarting anyway`,
+          );
+          emitGatewayRestart();
+        } else {
+          // Check again soon
+          setTimeout(checkAndRestart, checkIntervalMs);
+        }
+      };
+
+      setTimeout(checkAndRestart, checkIntervalMs);
+    } else {
+      // No active operations or pending replies, restart immediately
+      params.logReload.warn(`config change requires gateway restart (${reasons})`);
+      emitGatewayRestart();
+    }
  };

  return { applyHotReload, requestGatewayRestart };
--- a/src/gateway/server-reload.config-during-reply.test.ts
+++ b/src/gateway/server-reload.config-during-reply.test.ts
@@ -0,0 +1,151 @@
+/**
+ * E2E test for config reload during active reply sending.
+ * Tests that gateway restart is properly deferred until replies are sent.
+ */
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+  clearAllDispatchers,
+  getTotalPendingReplies,
+} from "../auto-reply/reply/dispatcher-registry.js";
+
+// Helper to flush all pending microtasks
+async function flushMicrotasks() {
+  for (let i = 0; i < 10; i++) {
+    await Promise.resolve();
+  }
+}
+
+describe("gateway config reload during reply", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    // Wait for any pending microtasks (from markComplete()) to complete
+    await flushMicrotasks();
+    clearAllDispatchers();
+  });
+
+  it("should defer restart until reply dispatcher completes", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalQueueSize } = await import("../process/command-queue.js");
+
+    // Create a dispatcher (simulating message handling)
+    let deliveredReplies: string[] = [];
+    const dispatcher = createReplyDispatcher({
+      deliver: async (payload) => {
+        // Simulate async reply delivery
+        await new Promise((resolve) => setTimeout(resolve, 100));
+        deliveredReplies.push(payload.text ?? "");
+      },
+      onError: (err) => {
+        throw err;
+      },
+    });
+
+    // Initially: pending=1 (reservation)
+    expect(getTotalPendingReplies()).toBe(1);
+
+    // Simulate command finishing and enqueuing reply
+    dispatcher.sendFinalReply({ text: "Configuration updated successfully!" });
+
+    // Now: pending=2 (reservation + 1 enqueued reply)
+    expect(getTotalPendingReplies()).toBe(2);
+
+    // Mark dispatcher complete (flags reservation for cleanup on last delivery)
+    dispatcher.markComplete();
+
+    // Reservation is still counted until the delivery .finally() clears it,
+    // but the important invariant is pending > 0 while delivery is in flight.
+    expect(getTotalPendingReplies()).toBeGreaterThan(0);
+
+    // At this point, if gateway restart was requested, it should defer
+    // because getTotalPendingReplies() > 0
+
+    // Wait for reply to be delivered
+    await dispatcher.waitForIdle();
+
+    // Now: pending=0 (reply sent)
+    expect(getTotalPendingReplies()).toBe(0);
+    expect(deliveredReplies).toEqual(["Configuration updated successfully!"]);
+
+    // Now restart can proceed safely
+    expect(getTotalQueueSize()).toBe(0);
+    expect(getTotalPendingReplies()).toBe(0);
+  });
+
+  it("should handle dispatcher reservation correctly when no replies sent", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+
+    let deliverCalled = false;
+    const dispatcher = createReplyDispatcher({
+      deliver: async () => {
+        deliverCalled = true;
+      },
+    });
+
+    // Initially: pending=1 (reservation)
+    expect(getTotalPendingReplies()).toBe(1);
+
+    // Mark complete without sending any replies
+    dispatcher.markComplete();
+
+    // Reservation is cleared via microtask — flush it
+    await flushMicrotasks();
+
+    // Now: pending=0 (reservation cleared, no replies were enqueued)
+    expect(getTotalPendingReplies()).toBe(0);
+
+    // Wait for idle (should resolve immediately since no replies)
+    await dispatcher.waitForIdle();
+
+    expect(deliverCalled).toBe(false);
+    expect(getTotalPendingReplies()).toBe(0);
+  });
+
+  it("should integrate dispatcher reservation with concurrent dispatchers", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalQueueSize } = await import("../process/command-queue.js");
+
+    const deliveredReplies: string[] = [];
+    const dispatcher = createReplyDispatcher({
+      deliver: async (payload) => {
+        await new Promise((resolve) => setTimeout(resolve, 50));
+        deliveredReplies.push(payload.text ?? "");
+      },
+    });
+
+    // Dispatcher has reservation (pending=1)
+    expect(getTotalPendingReplies()).toBe(1);
+
+    // Total active = queue + pending
+    const totalActive = getTotalQueueSize() + getTotalPendingReplies();
+    expect(totalActive).toBe(1); // 0 queue + 1 pending
+
+    // Command finishes, replies enqueued
+    dispatcher.sendFinalReply({ text: "Reply 1" });
+    dispatcher.sendFinalReply({ text: "Reply 2" });
+
+    // Now: pending=3 (reservation + 2 replies)
+    expect(getTotalPendingReplies()).toBe(3);
+
+    // Mark complete (flags reservation for cleanup on last delivery)
+    dispatcher.markComplete();
+
+    // Reservation still counted until delivery .finally() clears it,
+    // but the important invariant is pending > 0 while deliveries are in flight.
+    expect(getTotalPendingReplies()).toBeGreaterThan(0);
+
+    // Wait for replies
+    await dispatcher.waitForIdle();
+
+    // Replies sent, pending=0
+    expect(getTotalPendingReplies()).toBe(0);
+    expect(deliveredReplies).toEqual(["Reply 1", "Reply 2"]);
+
+    // Now everything is idle
+    expect(getTotalPendingReplies()).toBe(0);
+    expect(getTotalQueueSize()).toBe(0);
+  });
+});
--- a/src/gateway/server-reload.integration.test.ts
+++ b/src/gateway/server-reload.integration.test.ts
@@ -0,0 +1,199 @@
+/**
+ * Integration test simulating full message handling + config change + reply flow.
+ * This tests the complete scenario where a user configures an adapter via chat
+ * and ensures they get a reply before the gateway restarts.
+ */
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+
+describe("gateway restart deferral integration", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    // Wait for any pending microtasks (from markComplete()) to complete
+    await Promise.resolve();
+    const { clearAllDispatchers } = await import("../auto-reply/reply/dispatcher-registry.js");
+    clearAllDispatchers();
+  });
+
+  it("should defer restart until dispatcher completes with reply", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
+    const { getTotalQueueSize } = await import("../process/command-queue.js");
+
+    const events: string[] = [];
+
+    // T=0: Message received — dispatcher created (pending=1 reservation)
+    events.push("message-received");
+    const deliveredReplies: Array<{ text: string; timestamp: number }> = [];
+    const dispatcher = createReplyDispatcher({
+      deliver: async (payload) => {
+        // Simulate network delay
+        await new Promise((resolve) => setTimeout(resolve, 100));
+        deliveredReplies.push({
+          text: payload.text ?? "",
+          timestamp: Date.now(),
+        });
+        events.push(`reply-delivered: ${payload.text}`);
+      },
+    });
+    events.push("dispatcher-created");
+
+    // T=1: Config change detected
+    events.push("config-change-detected");
+
+    // Check if restart should be deferred
+    const queueSize = getTotalQueueSize();
+    const pendingReplies = getTotalPendingReplies();
+    const totalActive = queueSize + pendingReplies;
+
+    events.push(`defer-check: queue=${queueSize} pending=${pendingReplies} total=${totalActive}`);
+
+    // Should defer because dispatcher has reservation
+    expect(totalActive).toBeGreaterThan(0);
+    expect(pendingReplies).toBe(1); // reservation
+
+    if (totalActive > 0) {
+      events.push("restart-deferred");
+    }
+
+    // T=2: Command finishes, enqueue replies
+    dispatcher.sendFinalReply({ text: "Adapter configured successfully!" });
+    dispatcher.sendFinalReply({ text: "Gateway will restart to apply changes." });
+    events.push("replies-enqueued");
+
+    // Now pending should be 3 (reservation + 2 replies)
+    expect(getTotalPendingReplies()).toBe(3);
+
+    // Mark command complete (flags reservation for cleanup on last delivery)
+    dispatcher.markComplete();
+    events.push("command-complete");
+
+    // Reservation still counted until delivery .finally() clears it,
+    // but the important invariant is pending > 0 while deliveries are in flight.
+    expect(getTotalPendingReplies()).toBeGreaterThan(0);
+
+    // T=3: Wait for replies to be delivered
+    await dispatcher.waitForIdle();
+    events.push("dispatcher-idle");
+
+    // Replies should be delivered
+    expect(deliveredReplies).toHaveLength(2);
+    expect(deliveredReplies[0].text).toBe("Adapter configured successfully!");
+    expect(deliveredReplies[1].text).toBe("Gateway will restart to apply changes.");
+
+    // Pending should be 0
+    expect(getTotalPendingReplies()).toBe(0);
+
+    // T=4: Check if restart can proceed
+    const finalQueueSize = getTotalQueueSize();
+    const finalPendingReplies = getTotalPendingReplies();
+    const finalTotalActive = finalQueueSize + finalPendingReplies;
+
+    events.push(
+      `restart-check: queue=${finalQueueSize} pending=${finalPendingReplies} total=${finalTotalActive}`,
+    );
+
+    // Everything should be idle now
+    expect(finalTotalActive).toBe(0);
+    events.push("restart-can-proceed");
+
+    // Verify event sequence
+    expect(events).toEqual([
+      "message-received",
+      "dispatcher-created",
+      "config-change-detected",
+      "defer-check: queue=0 pending=1 total=1",
+      "restart-deferred",
+      "replies-enqueued",
+      "command-complete",
+      "reply-delivered: Adapter configured successfully!",
+      "reply-delivered: Gateway will restart to apply changes.",
+      "dispatcher-idle",
+      "restart-check: queue=0 pending=0 total=0",
+      "restart-can-proceed",
+    ]);
+  });
+
+  it("should handle concurrent dispatchers with config changes", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
+
+    // Simulate two messages being processed concurrently
+    const deliveredReplies: string[] = [];
+
+    // Message 1 — dispatcher created
+    const dispatcher1 = createReplyDispatcher({
+      deliver: async (payload) => {
+        await new Promise((resolve) => setTimeout(resolve, 50));
+        deliveredReplies.push(`msg1: ${payload.text}`);
+      },
+    });
+
+    // Message 2 — dispatcher created
+    const dispatcher2 = createReplyDispatcher({
+      deliver: async (payload) => {
+        await new Promise((resolve) => setTimeout(resolve, 50));
+        deliveredReplies.push(`msg2: ${payload.text}`);
+      },
+    });
+
+    // Both dispatchers have reservations
+    expect(getTotalPendingReplies()).toBe(2);
+
+    // Config change detected - should defer
+    const totalActive = getTotalPendingReplies();
+    expect(totalActive).toBe(2); // 2 dispatcher reservations
+
+    // Messages process and send replies
+    dispatcher1.sendFinalReply({ text: "Reply from message 1" });
+    dispatcher1.markComplete();
+
+    dispatcher2.sendFinalReply({ text: "Reply from message 2" });
+    dispatcher2.markComplete();
+
+    // Wait for both
+    await Promise.all([dispatcher1.waitForIdle(), dispatcher2.waitForIdle()]);
+
+    // All idle
+    expect(getTotalPendingReplies()).toBe(0);
+
+    // Replies delivered
+    expect(deliveredReplies).toHaveLength(2);
+  });
+
+  it("should handle rapid config changes without losing replies", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
+
+    const deliveredReplies: string[] = [];
+
+    // Message received — dispatcher created
+    const dispatcher = createReplyDispatcher({
+      deliver: async (payload) => {
+        await new Promise((resolve) => setTimeout(resolve, 200)); // Slow network
+        deliveredReplies.push(payload.text ?? "");
+      },
+    });
+
+    // Config change 1, 2, 3 (rapid changes)
+    // All should be deferred because dispatcher has pending replies
+
+    // Send replies
+    dispatcher.sendFinalReply({ text: "Processing..." });
+    dispatcher.sendFinalReply({ text: "Almost done..." });
+    dispatcher.sendFinalReply({ text: "Complete!" });
+    dispatcher.markComplete();
+
+    // Wait for all replies
+    await dispatcher.waitForIdle();
+
+    // All replies should be delivered
+    expect(deliveredReplies).toEqual(["Processing...", "Almost done...", "Complete!"]);
+
+    // Now restart can proceed
+    expect(getTotalPendingReplies()).toBe(0);
+  });
+});
--- a/src/gateway/server-reload.real-scenario.test.ts
+++ b/src/gateway/server-reload.real-scenario.test.ts
@@ -0,0 +1,121 @@
+/**
+ * REAL scenario test - simulates actual message handling with config changes.
+ * This test MUST fail if "imsg rpc not running" would occur in production.
+ */
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+
+describe("real scenario: config change during message processing", () => {
+  let replyErrors: string[] = [];
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    replyErrors = [];
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    // Wait for any pending microtasks (from markComplete()) to complete
+    await Promise.resolve();
+    const { clearAllDispatchers } = await import("../auto-reply/reply/dispatcher-registry.js");
+    clearAllDispatchers();
+  });
+
+  it("should NOT restart gateway while reply delivery is in flight", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
+
+    let rpcConnected = true;
+    const deliveredReplies: string[] = [];
+
+    // Create dispatcher with slow delivery (simulates real network delay)
+    const dispatcher = createReplyDispatcher({
+      deliver: async (payload) => {
+        if (!rpcConnected) {
+          const error = "Error: imsg rpc not running";
+          replyErrors.push(error);
+          throw new Error(error);
+        }
+        // Slow delivery — restart checks will run during this window
+        await new Promise((resolve) => setTimeout(resolve, 500));
+        deliveredReplies.push(payload.text ?? "");
+      },
+      onError: () => {
+        // Swallow delivery errors so the test can assert on replyErrors
+      },
+    });
+
+    // Enqueue reply and immediately clear the reservation.
+    // This is the critical sequence: after markComplete(), the ONLY thing
+    // keeping pending > 0 is the in-flight delivery itself.
+    dispatcher.sendFinalReply({ text: "Configuration updated!" });
+    dispatcher.markComplete();
+
+    // At this point: markComplete flagged, delivery is in flight.
+    // pending > 0 because the in-flight delivery keeps it alive.
+    const pendingDuringDelivery = getTotalPendingReplies();
+    expect(pendingDuringDelivery).toBeGreaterThan(0);
+
+    // Simulate restart checks while delivery is in progress.
+    // If the tracking is broken, pending would be 0 and we'd restart.
+    let restartTriggered = false;
+    for (let i = 0; i < 3; i++) {
+      await new Promise((resolve) => setTimeout(resolve, 100));
+      const pending = getTotalPendingReplies();
+      if (pending === 0) {
+        restartTriggered = true;
+        rpcConnected = false;
+        break;
+      }
+    }
+
+    // Wait for delivery to complete
+    await dispatcher.waitForIdle();
+
+    // Now pending should be 0 — restart can proceed
+    expect(getTotalPendingReplies()).toBe(0);
+
+    // CRITICAL: delivery must have succeeded without RPC being killed
+    expect(restartTriggered).toBe(false);
+    expect(replyErrors).toEqual([]);
+    expect(deliveredReplies).toEqual(["Configuration updated!"]);
+  });
+
+  it("should keep pending > 0 until reply is actually enqueued", async () => {
+    const { createReplyDispatcher } = await import("../auto-reply/reply/reply-dispatcher.js");
+    const { getTotalPendingReplies } = await import("../auto-reply/reply/dispatcher-registry.js");
+
+    const dispatcher = createReplyDispatcher({
+      deliver: async (_payload) => {
+        await new Promise((resolve) => setTimeout(resolve, 50));
+      },
+    });
+
+    // Initially: pending=1 (reservation)
+    expect(getTotalPendingReplies()).toBe(1);
+
+    // Simulate command processing delay BEFORE reply is enqueued
+    await new Promise((resolve) => setTimeout(resolve, 100));
+
+    // During this delay, pending should STILL be 1 (reservation active)
+    expect(getTotalPendingReplies()).toBe(1);
+
+    // Now enqueue reply
+    dispatcher.sendFinalReply({ text: "Reply" });
+
+    // Now pending should be 2 (reservation + reply)
+    expect(getTotalPendingReplies()).toBe(2);
+
+    // Mark complete
+    dispatcher.markComplete();
+
+    // After markComplete, pending should still be > 0 if reply hasn't sent yet
+    const pendingAfterMarkComplete = getTotalPendingReplies();
+    expect(pendingAfterMarkComplete).toBeGreaterThan(0);
+
+    // Wait for reply to send
+    await dispatcher.waitForIdle();
+
+    // Now pending should be 0
+    expect(getTotalPendingReplies()).toBe(0);
+  });
+});
--- a/src/gateway/server-restart-sentinel.ts
+++ b/src/gateway/server-restart-sentinel.ts
@@ -1,8 +1,8 @@
 import type { CliDeps } from "../cli/deps.js";
 import { resolveAnnounceTargetFromKey } from "../agents/tools/sessions-send-helpers.js";
 import { normalizeChannelId } from "../channels/plugins/index.js";
-import { agentCommand } from "../commands/agent.js";
 import { resolveMainSessionKeyFromConfig } from "../config/sessions.js";
+import { deliverOutboundPayloads } from "../infra/outbound/deliver.js";
 import { resolveOutboundTarget } from "../infra/outbound/targets.js";
 import {
  consumeRestartSentinel,
@@ -10,11 +10,10 @@ import {
  summarizeRestartSentinel,
 } from "../infra/restart-sentinel.js";
 import { enqueueSystemEvent } from "../infra/system-events.js";
-import { defaultRuntime } from "../runtime.js";
 import { deliveryContextFromSession, mergeDeliveryContext } from "../utils/delivery-context.js";
 import { loadSessionEntry } from "./session-utils.js";

-export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
+export async function scheduleRestartSentinelWake(_params: { deps: CliDeps }) {
  const sentinel = await consumeRestartSentinel();
  if (!sentinel) {
    return;
@@ -86,20 +85,15 @@ export async function scheduleRestartSentinelWake(params: { deps: CliDeps }) {
    (origin?.threadId != null ? String(origin.threadId) : undefined);

  try {
-    await agentCommand(
-      {
-        message,
-        sessionKey,
-        to: resolved.to,
-        channel,
-        deliver: true,
-        bestEffortDeliver: true,
-        messageChannel: channel,
-        threadId,
-      },
-      defaultRuntime,
-      params.deps,
-    );
+    await deliverOutboundPayloads({
+      cfg,
+      channel,
+      to: resolved.to,
+      accountId: origin?.accountId,
+      threadId,
+      payloads: [{ text: message }],
+      bestEffort: true,
+    });
  } catch (err) {
    enqueueSystemEvent(`${summary}\n${String(err)}`, { sessionKey });
  }
--- a/src/gateway/server.impl.ts
+++ b/src/gateway/server.impl.ts
@@ -5,8 +5,10 @@ import type { RuntimeEnv } from "../runtime.js";
 import type { ControlUiRootState } from "./control-ui.js";
 import type { startBrowserControlServerIfEnabled } from "./server-browser.js";
 import { resolveAgentWorkspaceDir, resolveDefaultAgentId } from "../agents/agent-scope.js";
+import { getActiveEmbeddedRunCount } from "../agents/pi-embedded-runner/runs.js";
 import { registerSkillsChangeListener } from "../agents/skills/refresh.js";
 import { initSubagentRegistry } from "../agents/subagent-registry.js";
+import { getTotalPendingReplies } from "../auto-reply/reply/dispatcher-registry.js";
 import { type ChannelId, listChannelPlugins } from "../channels/plugins/index.js";
 import { formatCliCommand } from "../cli/command-format.js";
 import { createDefaultDeps } from "../cli/deps.js";
@@ -32,7 +34,7 @@ import { onHeartbeatEvent } from "../infra/heartbeat-events.js";
 import { startHeartbeatRunner } from "../infra/heartbeat-runner.js";
 import { getMachineDisplayName } from "../infra/machine-name.js";
 import { ensureOpenClawCliOnPath } from "../infra/path-env.js";
-import { setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
+import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js";
 import {
  primeRemoteSkillsCache,
  refreshRemoteBinsForConnectedNodes,
@@ -42,6 +44,7 @@ import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js";
 import { startDiagnosticHeartbeat, stopDiagnosticHeartbeat } from "../logging/diagnostic.js";
 import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js";
 import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
+import { getTotalQueueSize } from "../process/command-queue.js";
 import { runOnboardingWizard } from "../wizard/onboarding.js";
 import { createAuthRateLimiter, type AuthRateLimiter } from "./auth-rate-limit.js";
 import { startGatewayConfigReloader } from "./config-reload.js";
@@ -225,6 +228,9 @@ export async function startGatewayServer(
    startDiagnosticHeartbeat();
  }
  setGatewaySigusr1RestartPolicy({ allowExternal: cfgAtStart.commands?.restart === true });
+  setPreRestartDeferralCheck(
+    () => getTotalQueueSize() + getTotalPendingReplies() + getActiveEmbeddedRunCount(),
+  );
  initSubagentRegistry();
  const defaultAgentId = resolveDefaultAgentId(cfgAtStart);
  const defaultWorkspaceDir = resolveAgentWorkspaceDir(cfgAtStart, defaultAgentId);