fix(cron): keep watchdog timer armed during ticks

This commit is contained in:
Peter Steinberger
2026-02-22 19:50:34 +01:00
parent 5db1ee4ec6
commit 8bf3c37c6c
3 changed files with 88 additions and 9 deletions

View File

@@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai
- Cron: honor `cron.maxConcurrentRuns` in the timer loop so due jobs can execute up to the configured parallelism instead of always running serially. (#11595) Thanks @Takhoffman.
- Cron/Isolation: force fresh session IDs for isolated cron runs so `sessionTarget="isolated"` executions never reuse prior run context. (#23470) Thanks @echoVic.
- Cron/Service: execute manual `cron.run` jobs outside the cron lock (while still persisting started/finished state atomically) so `cron.list` and `cron.status` remain responsive during long forced runs. (#23628) Thanks @dsgraves.
- Cron/Timer: keep a watchdog recheck timer armed while `onTimer` is actively executing so the scheduler continues polling even if a due-run tick stalls for an extended period. (#23628) Thanks @dsgraves.
- Agents/Compaction: restore embedded compaction safeguard/context-pruning extension loading in production by wiring bundled extension factories into the resource loader instead of runtime file-path resolution. (#22349) Thanks @Glucksberg.
- Feishu/Media: for inbound video messages that include both `file_key` (video) and `image_key` (thumbnail), prefer `file_key` when downloading media so video attachments are saved instead of silently failing on thumbnail keys. (#23633)
- Hooks/Cron: suppress duplicate main-session events for delivered hook turns and mark `SILENT_REPLY_TOKEN` (`NO_REPLY`) early exits as delivered to prevent hook context pollution. (#20678) Thanks @JonathanWorks.

View File

@@ -1,9 +1,12 @@
import fs from "node:fs/promises";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
createCronStoreHarness,
createNoopLogger,
createCronStoreHarness,
createRunningCronServiceState,
} from "./service.test-harness.js";
import { createCronServiceState } from "./service/state.js";
import { onTimer } from "./service/timer.js";
import type { CronJob } from "./types.js";
@@ -31,6 +34,14 @@ function createDueRecurringJob(params: {
};
}
function createDeferred<T>() {
let resolve!: (value: T) => void;
const promise = new Promise<T>((res) => {
resolve = res;
});
return { promise, resolve };
}
describe("CronService - timer re-arm when running (#12025)", () => {
beforeEach(() => {
noopLogger.debug.mockClear();
@@ -81,4 +92,64 @@ describe("CronService - timer re-arm when running (#12025)", () => {
timeoutSpy.mockRestore();
await store.cleanup();
});
it("arms a watchdog timer while a timer tick is still executing", async () => {
const timeoutSpy = vi.spyOn(globalThis, "setTimeout");
const store = await makeStorePath();
const now = Date.parse("2026-02-06T10:05:00.000Z");
const deferredRun = createDeferred<{ status: "ok"; summary: string }>();
await fs.mkdir(path.dirname(store.storePath), { recursive: true });
await fs.writeFile(
store.storePath,
JSON.stringify(
{
version: 1,
jobs: [
createDueRecurringJob({
id: "long-running-job",
nowMs: now,
nextRunAtMs: now,
}),
],
},
null,
2,
),
"utf-8",
);
const state = createCronServiceState({
storePath: store.storePath,
cronEnabled: true,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob: vi.fn(async () => await deferredRun.promise),
});
let settled = false;
const timerPromise = onTimer(state);
void timerPromise.finally(() => {
settled = true;
});
await Promise.resolve();
expect(settled).toBe(false);
expect(state.running).toBe(true);
expect(state.timer).not.toBeNull();
const delays = timeoutSpy.mock.calls
.map(([, delay]) => delay)
.filter((d): d is number => typeof d === "number");
expect(delays).toContain(60_000);
deferredRun.resolve({ status: "ok", summary: "done" });
await timerPromise;
expect(state.running).toBe(false);
timeoutSpy.mockRestore();
await store.cleanup();
});
});

View File

@@ -221,6 +221,17 @@ export function armTimer(state: CronServiceState) {
);
}
function armRunningRecheckTimer(state: CronServiceState) {
if (state.timer) {
clearTimeout(state.timer);
}
state.timer = setTimeout(() => {
void onTimer(state).catch((err) => {
state.deps.log.error({ err: String(err) }, "cron: timer tick failed");
});
}, MAX_TIMER_DELAY_MS);
}
export async function onTimer(state: CronServiceState) {
if (state.running) {
// Re-arm the timer so the scheduler keeps ticking even when a job is
@@ -233,17 +244,13 @@ export async function onTimer(state: CronServiceState) {
// zero-delay hot-loop when past-due jobs are waiting for the current
// execution to finish.
// See: https://github.com/openclaw/openclaw/issues/12025
if (state.timer) {
clearTimeout(state.timer);
}
state.timer = setTimeout(() => {
void onTimer(state).catch((err) => {
state.deps.log.error({ err: String(err) }, "cron: timer tick failed");
});
}, MAX_TIMER_DELAY_MS);
armRunningRecheckTimer(state);
return;
}
state.running = true;
// Keep a watchdog timer armed while a tick is executing. If execution hangs
// (for example in a provider call), the scheduler still wakes to re-check.
armRunningRecheckTimer(state);
try {
const dueJobs = await locked(state, async () => {
await ensureLoaded(state, { forceReload: true, skipRecompute: true });