mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 02:31:22 +00:00
fix(queue): harden drain/abort/timeout race handling
- reject new lane enqueues once gateway drain begins - always reset lane draining state and isolate onWait callback failures - persist per-session abort cutoff and skip stale queued messages - avoid false 600s agentTurn timeout in isolated cron jobs Fixes #27407 Fixes #27332 Fixes #27427 Co-authored-by: Kevin Shenghui <shenghuikevin@github.com> Co-authored-by: zjmy <zhangjunmengyang@gmail.com> Co-authored-by: suko <miha.sukic@gmail.com>
This commit is contained in:
@@ -9,7 +9,7 @@ import { CronService } from "./service.js";
|
||||
import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js";
|
||||
import { computeJobNextRunAtMs } from "./service/jobs.js";
|
||||
import { createCronServiceState, type CronEvent } from "./service/state.js";
|
||||
import { executeJobCore, onTimer, runMissedJobs } from "./service/timer.js";
|
||||
import { DEFAULT_JOB_TIMEOUT_MS, executeJobCore, onTimer, runMissedJobs } from "./service/timer.js";
|
||||
import type { CronJob, CronJobState } from "./types.js";
|
||||
|
||||
const noopLogger = {
|
||||
@@ -838,6 +838,58 @@ describe("Cron issue regressions", () => {
|
||||
expect(job?.state.lastStatus).toBe("ok");
|
||||
});
|
||||
|
||||
it("does not time out agentTurn jobs at the default 10-minute safety window", async () => {
|
||||
const store = await makeStorePath();
|
||||
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
|
||||
|
||||
const cronJob = createIsolatedRegressionJob({
|
||||
id: "agentturn-default-safety-window",
|
||||
name: "agentturn default safety window",
|
||||
scheduledAt,
|
||||
schedule: { kind: "at", at: new Date(scheduledAt).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "work" },
|
||||
state: { nextRunAtMs: scheduledAt },
|
||||
});
|
||||
await writeCronJobs(store.storePath, [cronJob]);
|
||||
|
||||
let now = scheduledAt;
|
||||
const deferredRun = createDeferred<{ status: "ok"; summary: string }>();
|
||||
const runIsolatedAgentJob = vi.fn(async ({ abortSignal }: { abortSignal?: AbortSignal }) => {
|
||||
const result = await deferredRun.promise;
|
||||
if (abortSignal?.aborted) {
|
||||
return { status: "error" as const, error: String(abortSignal.reason) };
|
||||
}
|
||||
now += 5;
|
||||
return result;
|
||||
});
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: store.storePath,
|
||||
log: noopLogger,
|
||||
nowMs: () => now,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob,
|
||||
});
|
||||
|
||||
const timerPromise = onTimer(state);
|
||||
let settled = false;
|
||||
void timerPromise.finally(() => {
|
||||
settled = true;
|
||||
});
|
||||
|
||||
await vi.advanceTimersByTimeAsync(DEFAULT_JOB_TIMEOUT_MS + 1_000);
|
||||
await Promise.resolve();
|
||||
expect(settled).toBe(false);
|
||||
|
||||
deferredRun.resolve({ status: "ok", summary: "done" });
|
||||
await timerPromise;
|
||||
|
||||
const job = state.store?.jobs.find((entry) => entry.id === "agentturn-default-safety-window");
|
||||
expect(job?.state.lastStatus).toBe("ok");
|
||||
expect(job?.state.lastError).toBeUndefined();
|
||||
});
|
||||
|
||||
it("aborts isolated runs when cron timeout fires", async () => {
|
||||
vi.useRealTimers();
|
||||
const store = await makeStorePath();
|
||||
|
||||
@@ -36,6 +36,7 @@ const MIN_REFIRE_GAP_MS = 2_000;
|
||||
* from wedging the entire cron lane.
|
||||
*/
|
||||
export const DEFAULT_JOB_TIMEOUT_MS = 10 * 60_000; // 10 minutes
|
||||
const AGENT_TURN_SAFETY_TIMEOUT_MS = 60 * 60_000; // 60 minutes
|
||||
|
||||
type TimedCronRunOutcome = CronRunOutcome &
|
||||
CronRunTelemetry & {
|
||||
@@ -52,7 +53,7 @@ function resolveCronJobTimeoutMs(job: CronJob): number | undefined {
|
||||
? Math.floor(job.payload.timeoutSeconds * 1_000)
|
||||
: undefined;
|
||||
if (configuredTimeoutMs === undefined) {
|
||||
return DEFAULT_JOB_TIMEOUT_MS;
|
||||
return job.payload.kind === "agentTurn" ? AGENT_TURN_SAFETY_TIMEOUT_MS : DEFAULT_JOB_TIMEOUT_MS;
|
||||
}
|
||||
return configuredTimeoutMs <= 0 ? undefined : configuredTimeoutMs;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user