mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 12:41:23 +00:00
refactor: unify gateway restart deferral and dispatcher cleanup
This commit is contained in:
@@ -6,7 +6,7 @@ import type { GatewayRequestContext, GatewayRequestHandlers } from "./types.js";
|
||||
import { resolveSessionAgentId } from "../../agents/agent-scope.js";
|
||||
import { resolveThinkingDefault } from "../../agents/model-selection.js";
|
||||
import { resolveAgentTimeoutMs } from "../../agents/timeout.js";
|
||||
import { dispatchInboundMessage } from "../../auto-reply/dispatch.js";
|
||||
import { dispatchInboundMessage, withReplyDispatcher } from "../../auto-reply/dispatch.js";
|
||||
import { createReplyDispatcher } from "../../auto-reply/reply/reply-dispatcher.js";
|
||||
import { createReplyPrefixOptions } from "../../channels/reply-prefix.js";
|
||||
import { resolveSessionFilePath } from "../../config/sessions.js";
|
||||
@@ -524,36 +524,40 @@ export const chatHandlers: GatewayRequestHandlers = {
|
||||
});
|
||||
|
||||
let agentRunStarted = false;
|
||||
void dispatchInboundMessage({
|
||||
ctx,
|
||||
cfg,
|
||||
void withReplyDispatcher({
|
||||
dispatcher,
|
||||
replyOptions: {
|
||||
runId: clientRunId,
|
||||
abortSignal: abortController.signal,
|
||||
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||
disableBlockStreaming: true,
|
||||
onAgentRunStart: (runId) => {
|
||||
agentRunStarted = true;
|
||||
const connId = typeof client?.connId === "string" ? client.connId : undefined;
|
||||
const wantsToolEvents = hasGatewayClientCap(
|
||||
client?.connect?.caps,
|
||||
GATEWAY_CLIENT_CAPS.TOOL_EVENTS,
|
||||
);
|
||||
if (connId && wantsToolEvents) {
|
||||
context.registerToolEventRecipient(runId, connId);
|
||||
// Register for any other active runs *in the same session* so
|
||||
// late-joining clients (e.g. page refresh mid-response) receive
|
||||
// in-progress tool events without leaking cross-session data.
|
||||
for (const [activeRunId, active] of context.chatAbortControllers) {
|
||||
if (activeRunId !== runId && active.sessionKey === p.sessionKey) {
|
||||
context.registerToolEventRecipient(activeRunId, connId);
|
||||
run: () =>
|
||||
dispatchInboundMessage({
|
||||
ctx,
|
||||
cfg,
|
||||
dispatcher,
|
||||
replyOptions: {
|
||||
runId: clientRunId,
|
||||
abortSignal: abortController.signal,
|
||||
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||
disableBlockStreaming: true,
|
||||
onAgentRunStart: (runId) => {
|
||||
agentRunStarted = true;
|
||||
const connId = typeof client?.connId === "string" ? client.connId : undefined;
|
||||
const wantsToolEvents = hasGatewayClientCap(
|
||||
client?.connect?.caps,
|
||||
GATEWAY_CLIENT_CAPS.TOOL_EVENTS,
|
||||
);
|
||||
if (connId && wantsToolEvents) {
|
||||
context.registerToolEventRecipient(runId, connId);
|
||||
// Register for any other active runs *in the same session* so
|
||||
// late-joining clients (e.g. page refresh mid-response) receive
|
||||
// in-progress tool events without leaking cross-session data.
|
||||
for (const [activeRunId, active] of context.chatAbortControllers) {
|
||||
if (activeRunId !== runId && active.sessionKey === p.sessionKey) {
|
||||
context.registerToolEventRecipient(activeRunId, connId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
onModelSelected,
|
||||
},
|
||||
},
|
||||
onModelSelected,
|
||||
},
|
||||
}),
|
||||
})
|
||||
.then(() => {
|
||||
if (!agentRunStarted) {
|
||||
|
||||
@@ -8,7 +8,11 @@ import { resolveAgentMaxConcurrent, resolveSubagentMaxConcurrent } from "../conf
|
||||
import { startGmailWatcher, stopGmailWatcher } from "../hooks/gmail-watcher.js";
|
||||
import { isTruthyEnvValue } from "../infra/env.js";
|
||||
import { resetDirectoryCache } from "../infra/outbound/target-resolver.js";
|
||||
import { emitGatewayRestart, setGatewaySigusr1RestartPolicy } from "../infra/restart.js";
|
||||
import {
|
||||
deferGatewayRestartUntilIdle,
|
||||
emitGatewayRestart,
|
||||
setGatewaySigusr1RestartPolicy,
|
||||
} from "../infra/restart.js";
|
||||
import { setCommandLaneConcurrency, getTotalQueueSize } from "../process/command-queue.js";
|
||||
import { CommandLane } from "../process/lanes.js";
|
||||
import { resolveHooksConfig } from "./hooks.js";
|
||||
@@ -155,13 +159,33 @@ export function createGatewayReloadHandlers(params: {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if there are active operations (commands in queue, pending replies, or embedded runs)
|
||||
const queueSize = getTotalQueueSize();
|
||||
const pendingReplies = getTotalPendingReplies();
|
||||
const embeddedRuns = getActiveEmbeddedRunCount();
|
||||
const totalActive = queueSize + pendingReplies + embeddedRuns;
|
||||
const getActiveCounts = () => {
|
||||
const queueSize = getTotalQueueSize();
|
||||
const pendingReplies = getTotalPendingReplies();
|
||||
const embeddedRuns = getActiveEmbeddedRunCount();
|
||||
return {
|
||||
queueSize,
|
||||
pendingReplies,
|
||||
embeddedRuns,
|
||||
totalActive: queueSize + pendingReplies + embeddedRuns,
|
||||
};
|
||||
};
|
||||
const formatActiveDetails = (counts: ReturnType<typeof getActiveCounts>) => {
|
||||
const details = [];
|
||||
if (counts.queueSize > 0) {
|
||||
details.push(`${counts.queueSize} operation(s)`);
|
||||
}
|
||||
if (counts.pendingReplies > 0) {
|
||||
details.push(`${counts.pendingReplies} reply(ies)`);
|
||||
}
|
||||
if (counts.embeddedRuns > 0) {
|
||||
details.push(`${counts.embeddedRuns} embedded run(s)`);
|
||||
}
|
||||
return details;
|
||||
};
|
||||
const active = getActiveCounts();
|
||||
|
||||
if (totalActive > 0) {
|
||||
if (active.totalActive > 0) {
|
||||
// Avoid spinning up duplicate polling loops from repeated config changes.
|
||||
if (restartPending) {
|
||||
params.logReload.info(
|
||||
@@ -170,63 +194,40 @@ export function createGatewayReloadHandlers(params: {
|
||||
return;
|
||||
}
|
||||
restartPending = true;
|
||||
const details = [];
|
||||
if (queueSize > 0) {
|
||||
details.push(`${queueSize} queued operation(s)`);
|
||||
}
|
||||
if (pendingReplies > 0) {
|
||||
details.push(`${pendingReplies} pending reply(ies)`);
|
||||
}
|
||||
if (embeddedRuns > 0) {
|
||||
details.push(`${embeddedRuns} embedded run(s)`);
|
||||
}
|
||||
const initialDetails = formatActiveDetails(active);
|
||||
params.logReload.warn(
|
||||
`config change requires gateway restart (${reasons}) — deferring until ${details.join(", ")} complete`,
|
||||
`config change requires gateway restart (${reasons}) — deferring until ${initialDetails.join(", ")} complete`,
|
||||
);
|
||||
|
||||
// Wait for all operations and replies to complete before restarting (max 30 seconds)
|
||||
const maxWaitMs = 30_000;
|
||||
const checkIntervalMs = 500;
|
||||
const startTime = Date.now();
|
||||
|
||||
const checkAndRestart = () => {
|
||||
const currentQueueSize = getTotalQueueSize();
|
||||
const currentPendingReplies = getTotalPendingReplies();
|
||||
const currentEmbeddedRuns = getActiveEmbeddedRunCount();
|
||||
const currentTotalActive = currentQueueSize + currentPendingReplies + currentEmbeddedRuns;
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
if (currentTotalActive === 0) {
|
||||
restartPending = false;
|
||||
params.logReload.info("all operations and replies completed; restarting gateway now");
|
||||
emitGatewayRestart();
|
||||
} else if (elapsed >= maxWaitMs) {
|
||||
const remainingDetails = [];
|
||||
if (currentQueueSize > 0) {
|
||||
remainingDetails.push(`${currentQueueSize} operation(s)`);
|
||||
}
|
||||
if (currentPendingReplies > 0) {
|
||||
remainingDetails.push(`${currentPendingReplies} reply(ies)`);
|
||||
}
|
||||
if (currentEmbeddedRuns > 0) {
|
||||
remainingDetails.push(`${currentEmbeddedRuns} embedded run(s)`);
|
||||
}
|
||||
restartPending = false;
|
||||
params.logReload.warn(
|
||||
`restart timeout after ${elapsed}ms with ${remainingDetails.join(", ")} still active; restarting anyway`,
|
||||
);
|
||||
emitGatewayRestart();
|
||||
} else {
|
||||
// Check again soon
|
||||
setTimeout(checkAndRestart, checkIntervalMs);
|
||||
}
|
||||
};
|
||||
|
||||
setTimeout(checkAndRestart, checkIntervalMs);
|
||||
deferGatewayRestartUntilIdle({
|
||||
getPendingCount: () => getActiveCounts().totalActive,
|
||||
hooks: {
|
||||
onReady: () => {
|
||||
restartPending = false;
|
||||
params.logReload.info("all operations and replies completed; restarting gateway now");
|
||||
},
|
||||
onTimeout: (_pending, elapsedMs) => {
|
||||
const remaining = formatActiveDetails(getActiveCounts());
|
||||
restartPending = false;
|
||||
params.logReload.warn(
|
||||
`restart timeout after ${elapsedMs}ms with ${remaining.join(", ")} still active; restarting anyway`,
|
||||
);
|
||||
},
|
||||
onCheckError: (err) => {
|
||||
restartPending = false;
|
||||
params.logReload.warn(
|
||||
`restart deferral check failed (${String(err)}); restarting gateway now`,
|
||||
);
|
||||
},
|
||||
},
|
||||
});
|
||||
} else {
|
||||
// No active operations or pending replies, restart immediately
|
||||
params.logReload.warn(`config change requires gateway restart (${reasons})`);
|
||||
emitGatewayRestart();
|
||||
const emitted = emitGatewayRestart();
|
||||
if (!emitted) {
|
||||
params.logReload.info("gateway restart already scheduled; skipping duplicate signal");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user