fix(cron): cancel timed-out runs before side effects (openclaw#22411) thanks @Takhoffman

Verified:
- pnpm check
- pnpm vitest run src/memory/qmd-manager.test.ts src/cron/service.issue-regressions.test.ts src/cron/isolated-agent.delivers-response-has-heartbeat-ok-but-includes.test.ts --maxWorkers=1

Co-authored-by: Takhoffman <781889+Takhoffman@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Tak Hoffman
2026-02-22 15:45:27 -06:00
committed by GitHub
parent 64b273a71c
commit 556af3f08b
5 changed files with 195 additions and 8 deletions

View File

@@ -72,8 +72,8 @@ export async function executeJobCoreWithTimeout(
executeJobCore(state, job, runAbortController.signal),
new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => {
runAbortController.abort(new Error("cron: job execution timed out"));
reject(new Error("cron: job execution timed out"));
runAbortController.abort(timeoutErrorMessage());
reject(new Error(timeoutErrorMessage()));
}, jobTimeoutMs);
}),
]);
@@ -91,6 +91,16 @@ function resolveRunConcurrency(state: CronServiceState): number {
}
return Math.max(1, Math.floor(raw));
}
function timeoutErrorMessage(): string {
return "cron: job execution timed out";
}
function isAbortError(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
return err.name === "AbortError" || err.message === timeoutErrorMessage();
}
/**
* Exponential backoff delays (in ms) indexed by consecutive error count.
* After the last entry the delay stays constant.
@@ -354,14 +364,15 @@ export async function onTimer(state: CronServiceState) {
const result = await executeJobCoreWithTimeout(state, job);
return { jobId: id, ...result, startedAt, endedAt: state.deps.nowMs() };
} catch (err) {
const errorText = isAbortError(err) ? timeoutErrorMessage() : String(err);
state.deps.log.warn(
{ jobId: id, jobName: job.name, timeoutMs: jobTimeoutMs ?? null },
`cron: job failed: ${String(err)}`,
`cron: job failed: ${errorText}`,
);
return {
jobId: id,
status: "error",
error: String(err),
error: errorText,
startedAt,
endedAt: state.deps.nowMs(),
};
@@ -596,6 +607,9 @@ export async function executeJobCore(
job: CronJob,
abortSignal?: AbortSignal,
): Promise<CronRunOutcome & CronRunTelemetry & { delivered?: boolean }> {
if (abortSignal?.aborted) {
return { status: "error", error: timeoutErrorMessage() };
}
if (job.sessionTarget === "main") {
const text = resolveJobPayloadTextForMain(job);
if (!text) {
@@ -622,6 +636,9 @@ export async function executeJobCore(
let heartbeatResult: HeartbeatRunResult;
for (;;) {
if (abortSignal?.aborted) {
return { status: "error", error: timeoutErrorMessage() };
}
heartbeatResult = await state.deps.runHeartbeatOnce({
reason,
agentId: job.agentId,
@@ -665,7 +682,7 @@ export async function executeJobCore(
return { status: "skipped", error: "isolated job requires payload.kind=agentTurn" };
}
if (abortSignal?.aborted) {
return { status: "error", error: "cron: job execution aborted" };
return { status: "error", error: timeoutErrorMessage() };
}
const res = await state.deps.runIsolatedAgentJob({
@@ -674,6 +691,10 @@ export async function executeJobCore(
abortSignal,
});
if (abortSignal?.aborted) {
return { status: "error", error: timeoutErrorMessage() };
}
// Post a short summary back to the main session — but only when the
// isolated run did NOT already deliver its output to the target channel.
// When `res.delivered` is true the announce flow (or direct outbound