fix(cron): prevent spin loop when job completes within scheduled second (#17821)

When a cron job fires and completes within the same wall-clock second it
was scheduled for, the next-run computation could return undefined or the
same second, causing the scheduler to re-trigger the job hundreds of
times in a tight loop.

Two-layer fix:

1. computeJobNextRunAtMs: When computeNextRunAtMs returns undefined for a
   cron-kind schedule (edge case where floored nowSecondMs matches the
   schedule), retry with the ceiling (next second) as reference time.
   This ensures we always get the next valid occurrence.

2. applyJobResult: Add MIN_REFIRE_GAP_MS (2s) safety net for cron-kind
   jobs.  After a successful run, nextRunAtMs is guaranteed to be at
   least 2s in the future.  This breaks any remaining spin-loop edge
   cases without affecting normal daily/hourly schedules (where the
   natural next run is hours/days away).

Fixes #17821
This commit is contained in:
Marcus Widing
2026-02-16 14:09:16 +01:00
committed by Peter Steinberger
parent eed806ce58
commit 8af4712c40
3 changed files with 96 additions and 2 deletions

View File

@@ -514,6 +514,69 @@ describe("Cron issue regressions", () => {
}
});
it("prevents spin loop when cron job completes within the scheduled second (#17821)", async () => {
const store = await makeStorePath();
// Simulate a cron job "0 13 * * *" (daily 13:00 UTC) that fires exactly
// at 13:00:00.000 and completes 7ms later (still in the same second).
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
const nextDay = scheduledAt + 86_400_000;
const cronJob: CronJob = {
id: "spin-loop-17821",
name: "daily noon",
enabled: true,
createdAtMs: scheduledAt - 86_400_000,
updatedAtMs: scheduledAt - 86_400_000,
schedule: { kind: "cron", expr: "0 13 * * *", tz: "UTC" },
sessionTarget: "isolated",
wakeMode: "next-heartbeat",
payload: { kind: "agentTurn", message: "briefing" },
delivery: { mode: "announce" },
state: { nextRunAtMs: scheduledAt },
};
await fs.writeFile(
store.storePath,
JSON.stringify({ version: 1, jobs: [cronJob] }, null, 2),
"utf-8",
);
let now = scheduledAt;
let fireCount = 0;
const events: CronEvent[] = [];
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
onEvent: (evt) => {
events.push(evt);
},
runIsolatedAgentJob: vi.fn(async () => {
// Job completes very quickly (7ms) — still within the same second
now += 7;
fireCount++;
return { status: "ok" as const, summary: "done" };
}),
});
// First timer tick — should fire the job exactly once
await onTimer(state);
expect(fireCount).toBe(1);
const job = state.store?.jobs.find((j) => j.id === "spin-loop-17821");
expect(job).toBeDefined();
// nextRunAtMs MUST be in the future (next day), not the same second
expect(job!.state.nextRunAtMs).toBeDefined();
expect(job!.state.nextRunAtMs).toBeGreaterThanOrEqual(nextDay);
// Second timer tick (simulating the timer re-arm) — should NOT fire again
await onTimer(state);
expect(fireCount).toBe(1);
});
it("records per-job start time and duration for batched due jobs", async () => {
const store = await makeStorePath();
const dueAt = Date.parse("2026-02-06T10:05:01.000Z");