mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 11:37:38 +00:00
fix(cron): prevent spin loop when job completes within scheduled second (#17821)
When a cron job fires and completes within the same wall-clock second it was scheduled for, the next-run computation could return undefined or the same second, causing the scheduler to re-trigger the job hundreds of times in a tight loop. Two-layer fix: 1. computeJobNextRunAtMs: When computeNextRunAtMs returns undefined for a cron-kind schedule (edge case where floored nowSecondMs matches the schedule), retry with the ceiling (next second) as reference time. This ensures we always get the next valid occurrence. 2. applyJobResult: Add MIN_REFIRE_GAP_MS (2s) safety net for cron-kind jobs. After a successful run, nextRunAtMs is guaranteed to be at least 2s in the future. This breaks any remaining spin-loop edge cases without affecting normal daily/hourly schedules (where the natural next run is hours/days away). Fixes #17821
This commit is contained in:
committed by
Peter Steinberger
parent
eed806ce58
commit
8af4712c40
@@ -514,6 +514,69 @@ describe("Cron issue regressions", () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("prevents spin loop when cron job completes within the scheduled second (#17821)", async () => {
|
||||||
|
const store = await makeStorePath();
|
||||||
|
// Simulate a cron job "0 13 * * *" (daily 13:00 UTC) that fires exactly
|
||||||
|
// at 13:00:00.000 and completes 7ms later (still in the same second).
|
||||||
|
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
|
||||||
|
const nextDay = scheduledAt + 86_400_000;
|
||||||
|
|
||||||
|
const cronJob: CronJob = {
|
||||||
|
id: "spin-loop-17821",
|
||||||
|
name: "daily noon",
|
||||||
|
enabled: true,
|
||||||
|
createdAtMs: scheduledAt - 86_400_000,
|
||||||
|
updatedAtMs: scheduledAt - 86_400_000,
|
||||||
|
schedule: { kind: "cron", expr: "0 13 * * *", tz: "UTC" },
|
||||||
|
sessionTarget: "isolated",
|
||||||
|
wakeMode: "next-heartbeat",
|
||||||
|
payload: { kind: "agentTurn", message: "briefing" },
|
||||||
|
delivery: { mode: "announce" },
|
||||||
|
state: { nextRunAtMs: scheduledAt },
|
||||||
|
};
|
||||||
|
await fs.writeFile(
|
||||||
|
store.storePath,
|
||||||
|
JSON.stringify({ version: 1, jobs: [cronJob] }, null, 2),
|
||||||
|
"utf-8",
|
||||||
|
);
|
||||||
|
|
||||||
|
let now = scheduledAt;
|
||||||
|
let fireCount = 0;
|
||||||
|
const events: CronEvent[] = [];
|
||||||
|
const state = createCronServiceState({
|
||||||
|
cronEnabled: true,
|
||||||
|
storePath: store.storePath,
|
||||||
|
log: noopLogger,
|
||||||
|
nowMs: () => now,
|
||||||
|
enqueueSystemEvent: vi.fn(),
|
||||||
|
requestHeartbeatNow: vi.fn(),
|
||||||
|
onEvent: (evt) => {
|
||||||
|
events.push(evt);
|
||||||
|
},
|
||||||
|
runIsolatedAgentJob: vi.fn(async () => {
|
||||||
|
// Job completes very quickly (7ms) — still within the same second
|
||||||
|
now += 7;
|
||||||
|
fireCount++;
|
||||||
|
return { status: "ok" as const, summary: "done" };
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// First timer tick — should fire the job exactly once
|
||||||
|
await onTimer(state);
|
||||||
|
|
||||||
|
expect(fireCount).toBe(1);
|
||||||
|
|
||||||
|
const job = state.store?.jobs.find((j) => j.id === "spin-loop-17821");
|
||||||
|
expect(job).toBeDefined();
|
||||||
|
// nextRunAtMs MUST be in the future (next day), not the same second
|
||||||
|
expect(job!.state.nextRunAtMs).toBeDefined();
|
||||||
|
expect(job!.state.nextRunAtMs).toBeGreaterThanOrEqual(nextDay);
|
||||||
|
|
||||||
|
// Second timer tick (simulating the timer re-arm) — should NOT fire again
|
||||||
|
await onTimer(state);
|
||||||
|
expect(fireCount).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
it("records per-job start time and duration for batched due jobs", async () => {
|
it("records per-job start time and duration for batched due jobs", async () => {
|
||||||
const store = await makeStorePath();
|
const store = await makeStorePath();
|
||||||
const dueAt = Date.parse("2026-02-06T10:05:01.000Z");
|
const dueAt = Date.parse("2026-02-06T10:05:01.000Z");
|
||||||
|
|||||||
@@ -96,7 +96,18 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und
|
|||||||
: null;
|
: null;
|
||||||
return atMs !== null ? atMs : undefined;
|
return atMs !== null ? atMs : undefined;
|
||||||
}
|
}
|
||||||
return computeNextRunAtMs(job.schedule, nowMs);
|
const next = computeNextRunAtMs(job.schedule, nowMs);
|
||||||
|
// Guard against the scheduler returning a time within the same second as
|
||||||
|
// nowMs. When a cron job completes within the same wall-clock second it
|
||||||
|
// was scheduled for, some croner versions/timezone combinations may return
|
||||||
|
// the current second (or computeNextRunAtMs may return undefined, which
|
||||||
|
// triggers recomputation). Advancing to the next second and retrying
|
||||||
|
// ensures we always land on the *next* occurrence. (See #17821)
|
||||||
|
if (next === undefined && job.schedule.kind === "cron") {
|
||||||
|
const nextSecondMs = (Math.floor(nowMs / 1000) + 1) * 1000;
|
||||||
|
return computeNextRunAtMs(job.schedule, nextSecondMs);
|
||||||
|
}
|
||||||
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Maximum consecutive schedule errors before auto-disabling a job. */
|
/** Maximum consecutive schedule errors before auto-disabling a job. */
|
||||||
|
|||||||
@@ -15,6 +15,15 @@ import { ensureLoaded, persist } from "./store.js";
|
|||||||
|
|
||||||
const MAX_TIMER_DELAY_MS = 60_000;
|
const MAX_TIMER_DELAY_MS = 60_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum gap between consecutive fires of the same cron job. This is a
|
||||||
|
* safety net that prevents spin-loops when `computeJobNextRunAtMs` returns
|
||||||
|
* a value within the same second as the just-completed run. The guard
|
||||||
|
* is intentionally generous (2 s) so it never masks a legitimate schedule
|
||||||
|
* but always breaks an infinite re-trigger cycle. (See #17821)
|
||||||
|
*/
|
||||||
|
const MIN_REFIRE_GAP_MS = 2_000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum wall-clock time for a single job execution. Acts as a safety net
|
* Maximum wall-clock time for a single job execution. Acts as a safety net
|
||||||
* on top of the per-provider / per-agent timeouts to prevent one stuck job
|
* on top of the per-provider / per-agent timeouts to prevent one stuck job
|
||||||
@@ -107,7 +116,18 @@ function applyJobResult(
|
|||||||
"cron: applying error backoff",
|
"cron: applying error backoff",
|
||||||
);
|
);
|
||||||
} else if (job.enabled) {
|
} else if (job.enabled) {
|
||||||
job.state.nextRunAtMs = computeJobNextRunAtMs(job, result.endedAt);
|
const naturalNext = computeJobNextRunAtMs(job, result.endedAt);
|
||||||
|
if (job.schedule.kind === "cron") {
|
||||||
|
// Safety net: ensure the next fire is at least MIN_REFIRE_GAP_MS
|
||||||
|
// after the current run ended. Prevents spin-loops when the
|
||||||
|
// schedule computation lands in the same second due to
|
||||||
|
// timezone/croner edge cases (see #17821).
|
||||||
|
const minNext = result.endedAt + MIN_REFIRE_GAP_MS;
|
||||||
|
job.state.nextRunAtMs =
|
||||||
|
naturalNext !== undefined ? Math.max(naturalNext, minNext) : minNext;
|
||||||
|
} else {
|
||||||
|
job.state.nextRunAtMs = naturalNext;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
job.state.nextRunAtMs = undefined;
|
job.state.nextRunAtMs = undefined;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user