fix(queue): harden drain/abort/timeout race handling

- reject new lane enqueues once gateway drain begins
- always reset lane draining state and isolate onWait callback failures
- persist per-session abort cutoff and skip stale queued messages
- avoid false 600s agentTurn timeout in isolated cron jobs

Fixes #27407
Fixes #27332
Fixes #27427

Co-authored-by: Kevin Shenghui <shenghuikevin@github.com>
Co-authored-by: zjmy <zhangjunmengyang@gmail.com>
Co-authored-by: suko <miha.sukic@gmail.com>
This commit is contained in:
Peter Steinberger
2026-02-26 13:43:30 +01:00
parent 1aef45bc06
commit c397a02c9a
13 changed files with 551 additions and 42 deletions

View File

@@ -10,8 +10,10 @@ import {
isAbortRequestText,
isAbortTrigger,
resetAbortMemoryForTest,
resolveAbortCutoffFromContext,
resolveSessionEntryForKey,
setAbortMemory,
shouldSkipMessageByAbortCutoff,
tryFastAbortFromMessage,
} from "./abort.js";
import { enqueueFollowupRun, getFollowupQueueDepth, type FollowupRun } from "./queue.js";
@@ -80,6 +82,9 @@ describe("abort detection", () => {
sessionKey: string;
from: string;
to: string;
targetSessionKey?: string;
messageSid?: string;
timestamp?: number;
}) {
return tryFastAbortFromMessage({
ctx: buildTestCtx({
@@ -91,6 +96,9 @@ describe("abort detection", () => {
Surface: "telegram",
From: params.from,
To: params.to,
...(params.targetSessionKey ? { CommandTargetSessionKey: params.targetSessionKey } : {}),
...(params.messageSid ? { MessageSid: params.messageSid } : {}),
...(typeof params.timestamp === "number" ? { Timestamp: params.timestamp } : {}),
}),
cfg: params.cfg,
});
@@ -221,6 +229,62 @@ describe("abort detection", () => {
expect(getAbortMemory("session-2104")).toBe(true);
});
it("extracts abort cutoff metadata from context", () => {
expect(
resolveAbortCutoffFromContext(
buildTestCtx({
MessageSid: "42",
Timestamp: 123,
}),
),
).toEqual({
messageSid: "42",
timestamp: 123,
});
});
it("treats numeric message IDs at or before cutoff as stale", () => {
expect(
shouldSkipMessageByAbortCutoff({
cutoffMessageSid: "200",
messageSid: "199",
}),
).toBe(true);
expect(
shouldSkipMessageByAbortCutoff({
cutoffMessageSid: "200",
messageSid: "200",
}),
).toBe(true);
expect(
shouldSkipMessageByAbortCutoff({
cutoffMessageSid: "200",
messageSid: "201",
}),
).toBe(false);
});
it("falls back to timestamp cutoff when message IDs are unavailable", () => {
expect(
shouldSkipMessageByAbortCutoff({
cutoffTimestamp: 2000,
timestamp: 1999,
}),
).toBe(true);
expect(
shouldSkipMessageByAbortCutoff({
cutoffTimestamp: 2000,
timestamp: 2000,
}),
).toBe(true);
expect(
shouldSkipMessageByAbortCutoff({
cutoffTimestamp: 2000,
timestamp: 2001,
}),
).toBe(false);
});
it("resolves session entry when key exists in store", () => {
const store = {
"session-1": { sessionId: "abc", updatedAt: 0 },
@@ -291,6 +355,64 @@ describe("abort detection", () => {
expect(commandQueueMocks.clearCommandLane).toHaveBeenCalledWith(`session:${sessionKey}`);
});
it("persists abort cutoff metadata on /stop when command and target session match", async () => {
const sessionKey = "telegram:123";
const sessionId = "session-123";
const { storePath, cfg } = await createAbortConfig({
sessionIdsByKey: { [sessionKey]: sessionId },
});
const result = await runStopCommand({
cfg,
sessionKey,
from: "telegram:123",
to: "telegram:123",
messageSid: "55",
timestamp: 1234567890000,
});
expect(result.handled).toBe(true);
const store = JSON.parse(await fs.readFile(storePath, "utf8")) as Record<string, unknown>;
const entry = store[sessionKey] as {
abortedLastRun?: boolean;
abortCutoffMessageSid?: string;
abortCutoffTimestamp?: number;
};
expect(entry.abortedLastRun).toBe(true);
expect(entry.abortCutoffMessageSid).toBe("55");
expect(entry.abortCutoffTimestamp).toBe(1234567890000);
});
it("does not persist cutoff metadata when native /stop targets a different session", async () => {
const slashSessionKey = "telegram:slash:123";
const targetSessionKey = "agent:main:telegram:group:123";
const targetSessionId = "session-target";
const { storePath, cfg } = await createAbortConfig({
sessionIdsByKey: { [targetSessionKey]: targetSessionId },
});
const result = await runStopCommand({
cfg,
sessionKey: slashSessionKey,
from: "telegram:123",
to: "telegram:123",
targetSessionKey,
messageSid: "999",
timestamp: 1234567890000,
});
expect(result.handled).toBe(true);
const store = JSON.parse(await fs.readFile(storePath, "utf8")) as Record<string, unknown>;
const entry = store[targetSessionKey] as {
abortedLastRun?: boolean;
abortCutoffMessageSid?: string;
abortCutoffTimestamp?: number;
};
expect(entry.abortedLastRun).toBe(true);
expect(entry.abortCutoffMessageSid).toBeUndefined();
expect(entry.abortCutoffTimestamp).toBeUndefined();
});
it("fast-abort stops active subagent runs for requester session", async () => {
const sessionKey = "telegram:parent";
const childKey = "agent:main:subagent:child-1";

View File

@@ -113,6 +113,80 @@ export function getAbortMemory(key: string): boolean | undefined {
return ABORT_MEMORY.get(normalized);
}
export type AbortCutoff = {
messageSid?: string;
timestamp?: number;
};
export function resolveAbortCutoffFromContext(ctx: MsgContext): AbortCutoff | undefined {
const messageSid =
(typeof ctx.MessageSidFull === "string" && ctx.MessageSidFull.trim()) ||
(typeof ctx.MessageSid === "string" && ctx.MessageSid.trim()) ||
undefined;
const timestamp =
typeof ctx.Timestamp === "number" && Number.isFinite(ctx.Timestamp) ? ctx.Timestamp : undefined;
if (!messageSid && timestamp === undefined) {
return undefined;
}
return { messageSid, timestamp };
}
function toNumericMessageSid(value: string | undefined): bigint | undefined {
const trimmed = value?.trim();
if (!trimmed || !/^\d+$/.test(trimmed)) {
return undefined;
}
try {
return BigInt(trimmed);
} catch {
return undefined;
}
}
export function shouldSkipMessageByAbortCutoff(params: {
cutoffMessageSid?: string;
cutoffTimestamp?: number;
messageSid?: string;
timestamp?: number;
}): boolean {
const cutoffSid = params.cutoffMessageSid?.trim();
const currentSid = params.messageSid?.trim();
if (cutoffSid && currentSid) {
const cutoffNumeric = toNumericMessageSid(cutoffSid);
const currentNumeric = toNumericMessageSid(currentSid);
if (cutoffNumeric !== undefined && currentNumeric !== undefined) {
return currentNumeric <= cutoffNumeric;
}
if (currentSid === cutoffSid) {
return true;
}
}
if (
typeof params.cutoffTimestamp === "number" &&
Number.isFinite(params.cutoffTimestamp) &&
typeof params.timestamp === "number" &&
Number.isFinite(params.timestamp)
) {
return params.timestamp <= params.cutoffTimestamp;
}
return false;
}
function shouldPersistAbortCutoff(params: {
commandSessionKey?: string;
targetSessionKey?: string;
}): boolean {
const commandSessionKey = params.commandSessionKey?.trim();
const targetSessionKey = params.targetSessionKey?.trim();
if (!commandSessionKey || !targetSessionKey) {
return true;
}
// Native targeted /stop can run from a slash/session-control key while the
// actual target session uses different message id/timestamp spaces.
// Persist cutoff only when command source and target are the same session.
return commandSessionKey === targetSessionKey;
}
function pruneAbortMemory(): void {
if (ABORT_MEMORY.size <= ABORT_MEMORY_MAX) {
return;
@@ -302,8 +376,16 @@ export async function tryFastAbortFromMessage(params: {
`abort: cleared followups=${cleared.followupCleared} lane=${cleared.laneCleared} keys=${cleared.keys.join(",")}`,
);
}
const abortCutoff = shouldPersistAbortCutoff({
commandSessionKey: ctx.SessionKey,
targetSessionKey: key ?? targetKey,
})
? resolveAbortCutoffFromContext(ctx)
: undefined;
if (entry && key) {
entry.abortedLastRun = true;
entry.abortCutoffMessageSid = abortCutoff?.messageSid;
entry.abortCutoffTimestamp = abortCutoff?.timestamp;
entry.updatedAt = Date.now();
store[key] = entry;
await updateSessionStore(storePath, (nextStore) => {
@@ -312,6 +394,8 @@ export async function tryFastAbortFromMessage(params: {
return;
}
nextEntry.abortedLastRun = true;
nextEntry.abortCutoffMessageSid = abortCutoff?.messageSid;
nextEntry.abortCutoffTimestamp = abortCutoff?.timestamp;
nextEntry.updatedAt = Date.now();
nextStore[key] = nextEntry;
});

View File

@@ -19,6 +19,7 @@ import { normalizeUsageDisplay, resolveResponseUsageMode } from "../thinking.js"
import {
formatAbortReplyText,
isAbortTrigger,
resolveAbortCutoffFromContext,
resolveSessionEntryForKey,
setAbortMemory,
stopSubagentsForRequester,
@@ -99,6 +100,7 @@ async function applyAbortTarget(params: {
sessionStore?: Record<string, SessionEntry>;
storePath?: string;
abortKey?: string;
abortCutoff?: { messageSid?: string; timestamp?: number };
}) {
const { abortTarget } = params;
if (abortTarget.sessionId) {
@@ -106,11 +108,19 @@ async function applyAbortTarget(params: {
}
if (abortTarget.entry && params.sessionStore && abortTarget.key) {
abortTarget.entry.abortedLastRun = true;
abortTarget.entry.abortCutoffMessageSid = params.abortCutoff?.messageSid;
abortTarget.entry.abortCutoffTimestamp = params.abortCutoff?.timestamp;
abortTarget.entry.updatedAt = Date.now();
params.sessionStore[abortTarget.key] = abortTarget.entry;
if (params.storePath) {
await updateSessionStore(params.storePath, (store) => {
store[abortTarget.key] = abortTarget.entry;
store[abortTarget.key] = {
...abortTarget.entry,
abortedLastRun: true,
abortCutoffMessageSid: params.abortCutoff?.messageSid,
abortCutoffTimestamp: params.abortCutoff?.timestamp,
updatedAt: Date.now(),
};
});
}
} else if (params.abortKey) {
@@ -503,6 +513,12 @@ export const handleStopCommand: CommandHandler = async (params, allowTextCommand
sessionStore: params.sessionStore,
storePath: params.storePath,
abortKey: params.command.abortKey,
abortCutoff:
params.sessionKey?.trim() &&
abortTarget.key?.trim() &&
params.sessionKey.trim() === abortTarget.key.trim()
? resolveAbortCutoffFromContext(params.ctx)
: undefined,
});
// Trigger internal hook for stop command
@@ -545,6 +561,12 @@ export const handleAbortTrigger: CommandHandler = async (params, allowTextComman
sessionStore: params.sessionStore,
storePath: params.storePath,
abortKey: params.command.abortKey,
abortCutoff:
params.sessionKey?.trim() &&
abortTarget.key?.trim() &&
params.sessionKey.trim() === abortTarget.key.trim()
? resolveAbortCutoffFromContext(params.ctx)
: undefined,
});
return { shouldContinue: false, reply: { text: "⚙️ Agent was aborted." } };
};

View File

@@ -1,4 +1,5 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { SessionEntry } from "../../config/sessions.js";
import type { TemplateContext } from "../templating.js";
import { clearInlineDirectives } from "./get-reply-directives-utils.js";
import { buildTestCtx } from "./test-ctx.js";
@@ -146,4 +147,78 @@ describe("handleInlineActions", () => {
}),
);
});
it("skips stale queued messages that are at or before the /stop cutoff", async () => {
const typing = createTypingController();
const sessionEntry: SessionEntry = {
sessionId: "session-1",
updatedAt: Date.now(),
abortCutoffMessageSid: "42",
abortedLastRun: true,
};
const sessionStore = { "s:main": sessionEntry };
const ctx = buildTestCtx({
Body: "old queued message",
CommandBody: "old queued message",
MessageSid: "41",
});
const result = await handleInlineActions(
createHandleInlineActionsInput({
ctx,
typing,
cleanedBody: "old queued message",
command: {
rawBodyNormalized: "old queued message",
commandBodyNormalized: "old queued message",
},
overrides: {
sessionEntry,
sessionStore,
},
}),
);
expect(result).toEqual({ kind: "reply", reply: undefined });
expect(typing.cleanup).toHaveBeenCalled();
expect(handleCommandsMock).not.toHaveBeenCalled();
});
it("clears /stop cutoff when a newer message arrives", async () => {
const typing = createTypingController();
const sessionEntry: SessionEntry = {
sessionId: "session-2",
updatedAt: Date.now(),
abortCutoffMessageSid: "42",
abortedLastRun: true,
};
const sessionStore = { "s:main": sessionEntry };
handleCommandsMock.mockResolvedValue({ shouldContinue: false, reply: { text: "ok" } });
const ctx = buildTestCtx({
Body: "new message",
CommandBody: "new message",
MessageSid: "43",
});
const result = await handleInlineActions(
createHandleInlineActionsInput({
ctx,
typing,
cleanedBody: "new message",
command: {
rawBodyNormalized: "new message",
commandBodyNormalized: "new message",
},
overrides: {
sessionEntry,
sessionStore,
},
}),
);
expect(result).toEqual({ kind: "reply", reply: { text: "ok" } });
expect(sessionStore["s:main"]?.abortCutoffMessageSid).toBeUndefined();
expect(sessionStore["s:main"]?.abortCutoffTimestamp).toBeUndefined();
expect(handleCommandsMock).toHaveBeenCalledTimes(1);
});
});

View File

@@ -5,6 +5,7 @@ import { applyOwnerOnlyToolPolicy } from "../../agents/tool-policy.js";
import { getChannelDock } from "../../channels/dock.js";
import type { OpenClawConfig } from "../../config/config.js";
import type { SessionEntry } from "../../config/sessions.js";
import { updateSessionStore } from "../../config/sessions.js";
import { logVerbose } from "../../globals.js";
import { generateSecureToken } from "../../infra/secure-random.js";
import { resolveGatewayMessageChannel } from "../../utils/message-channel.js";
@@ -16,7 +17,7 @@ import {
import type { MsgContext, TemplateContext } from "../templating.js";
import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { getAbortMemory } from "./abort.js";
import { getAbortMemory, isAbortRequestText, shouldSkipMessageByAbortCutoff } from "./abort.js";
import { buildStatusReply, handleCommands } from "./commands.js";
import type { InlineDirectives } from "./directive-handling.js";
import { isDirectiveOnly } from "./directive-handling.js";
@@ -252,6 +253,57 @@ export async function handleInlineActions(params: {
await opts.onBlockReply(reply);
};
const clearAbortCutoff = async () => {
if (!sessionEntry || !sessionStore || !sessionKey) {
return;
}
if (
sessionEntry.abortCutoffMessageSid === undefined &&
sessionEntry.abortCutoffTimestamp === undefined
) {
return;
}
sessionEntry.abortCutoffMessageSid = undefined;
sessionEntry.abortCutoffTimestamp = undefined;
sessionEntry.updatedAt = Date.now();
sessionStore[sessionKey] = sessionEntry;
if (storePath) {
await updateSessionStore(storePath, (store) => {
const existing = store[sessionKey] ?? sessionEntry;
if (!existing) {
return;
}
existing.abortCutoffMessageSid = undefined;
existing.abortCutoffTimestamp = undefined;
existing.updatedAt = Date.now();
store[sessionKey] = existing;
});
}
};
const isStopLikeInbound = isAbortRequestText(command.rawBodyNormalized);
if (!isStopLikeInbound && sessionEntry) {
const shouldSkip = shouldSkipMessageByAbortCutoff({
cutoffMessageSid: sessionEntry.abortCutoffMessageSid,
cutoffTimestamp: sessionEntry.abortCutoffTimestamp,
messageSid:
(typeof ctx.MessageSidFull === "string" && ctx.MessageSidFull.trim()) ||
(typeof ctx.MessageSid === "string" && ctx.MessageSid.trim()) ||
undefined,
timestamp: typeof ctx.Timestamp === "number" ? ctx.Timestamp : undefined,
});
if (shouldSkip) {
typing.cleanup();
return { kind: "reply", reply: undefined };
}
if (
sessionEntry.abortCutoffMessageSid !== undefined ||
sessionEntry.abortCutoffTimestamp !== undefined
) {
await clearAbortCutoff();
}
}
const inlineCommand =
allowTextCommands && command.isAuthorizedSender
? extractInlineSimpleCommand(cleanedBody)