mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 16:04:59 +00:00
fix(cron): isolate schedule errors to prevent one bad job from breaking all jobs (#14385)
Previously, if one cron job had a malformed schedule expression (e.g. invalid cron syntax), the error would propagate up and break the entire scheduler loop. This meant one misconfigured job could prevent ALL cron jobs from running. Changes: - Wrap per-job schedule computation in try/catch in recomputeNextRuns() - Track consecutive schedule errors via new scheduleErrorCount field - Log warnings for schedule errors with job ID and name - Auto-disable jobs after 3 consecutive schedule errors (with error-level log) - Clear error count when schedule computation succeeds - Continue processing other jobs even when one fails This ensures the scheduler is resilient to individual job misconfigurations while still providing visibility into problems through logging. Co-authored-by: Marvin <numegilagent@gmail.com>
This commit is contained in:
@@ -87,6 +87,9 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und
|
||||
return computeNextRunAtMs(job.schedule, nowMs);
|
||||
}
|
||||
|
||||
/** Maximum consecutive schedule errors before auto-disabling a job. */
|
||||
const MAX_SCHEDULE_ERRORS = 3;
|
||||
|
||||
export function recomputeNextRuns(state: CronServiceState): boolean {
|
||||
if (!state.store) {
|
||||
return false;
|
||||
@@ -124,10 +127,36 @@ export function recomputeNextRuns(state: CronServiceState): boolean {
|
||||
const nextRun = job.state.nextRunAtMs;
|
||||
const isDueOrMissing = nextRun === undefined || now >= nextRun;
|
||||
if (isDueOrMissing) {
|
||||
const newNext = computeJobNextRunAtMs(job, now);
|
||||
if (job.state.nextRunAtMs !== newNext) {
|
||||
job.state.nextRunAtMs = newNext;
|
||||
try {
|
||||
const newNext = computeJobNextRunAtMs(job, now);
|
||||
if (job.state.nextRunAtMs !== newNext) {
|
||||
job.state.nextRunAtMs = newNext;
|
||||
changed = true;
|
||||
}
|
||||
// Clear schedule error count on successful computation.
|
||||
if (job.state.scheduleErrorCount) {
|
||||
job.state.scheduleErrorCount = undefined;
|
||||
changed = true;
|
||||
}
|
||||
} catch (err) {
|
||||
const errorCount = (job.state.scheduleErrorCount ?? 0) + 1;
|
||||
job.state.scheduleErrorCount = errorCount;
|
||||
job.state.nextRunAtMs = undefined;
|
||||
job.state.lastError = `schedule error: ${String(err)}`;
|
||||
changed = true;
|
||||
|
||||
if (errorCount >= MAX_SCHEDULE_ERRORS) {
|
||||
job.enabled = false;
|
||||
state.deps.log.error(
|
||||
{ jobId: job.id, name: job.name, errorCount, err: String(err) },
|
||||
"cron: auto-disabled job after repeated schedule errors",
|
||||
);
|
||||
} else {
|
||||
state.deps.log.warn(
|
||||
{ jobId: job.id, name: job.name, errorCount, err: String(err) },
|
||||
"cron: failed to compute next run for job (skipping)",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user