fix(stability): patch regex retries and timeout abort handling

This commit is contained in:
Frank Yang
2026-02-22 00:12:22 -08:00
committed by Peter Steinberger
parent 99a2f5379e
commit 1051f42f96
15 changed files with 294 additions and 19 deletions

View File

@@ -154,6 +154,7 @@ export async function runCronIsolatedAgentTurn(params: {
deps: CliDeps;
job: CronJob;
message: string;
abortSignal?: AbortSignal;
sessionKey: string;
agentId?: string;
lane?: string;
@@ -454,6 +455,9 @@ export async function runCronIsolatedAgentTurn(params: {
agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(params.cfg, agentId),
run: (providerOverride, modelOverride) => {
if (params.abortSignal?.aborted) {
throw new Error("cron: isolated run aborted");
}
if (isCliProvider(providerOverride, cfgWithAgentDefaults)) {
const cliSessionId = getCliSessionId(cronSession.sessionEntry, providerOverride);
return runCliAgent({
@@ -492,6 +496,7 @@ export async function runCronIsolatedAgentTurn(params: {
runId: cronSession.sessionEntry.sessionId,
requireExplicitMessageTarget: true,
disableMessageTool: deliveryRequested,
abortSignal: params.abortSignal,
});
},
});

View File

@@ -683,6 +683,55 @@ describe("Cron issue regressions", () => {
expect(job?.state.lastStatus).toBe("ok");
});
it("aborts isolated runs when cron timeout fires", async () => {
vi.useRealTimers();
const store = await makeStorePath();
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
const cronJob = createIsolatedRegressionJob({
id: "abort-on-timeout",
name: "abort timeout",
scheduledAt,
schedule: { kind: "at", at: new Date(scheduledAt).toISOString() },
payload: { kind: "agentTurn", message: "work", timeoutSeconds: 0.01 },
state: { nextRunAtMs: scheduledAt },
});
await writeCronJobs(store.storePath, [cronJob]);
let now = scheduledAt;
let observedAbortSignal: AbortSignal | undefined;
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob: vi.fn(async ({ abortSignal }) => {
observedAbortSignal = abortSignal;
await new Promise<void>((resolve) => {
if (!abortSignal) {
return;
}
if (abortSignal.aborted) {
resolve();
return;
}
abortSignal.addEventListener("abort", () => resolve(), { once: true });
});
now += 5;
return { status: "ok" as const, summary: "late" };
}),
});
await onTimer(state);
expect(observedAbortSignal).toBeDefined();
expect(observedAbortSignal?.aborted).toBe(true);
const job = state.store?.jobs.find((entry) => entry.id === "abort-on-timeout");
expect(job?.state.lastStatus).toBe("error");
expect(job?.state.lastError).toContain("timed out");
});
it("retries cron schedule computation from the next second when the first attempt returns undefined (#17821)", () => {
const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z");
const cronJob = createIsolatedRegressionJob({

View File

@@ -62,7 +62,11 @@ export type CronServiceDeps = {
wakeNowHeartbeatBusyMaxWaitMs?: number;
/** WakeMode=now: delay between runHeartbeatOnce retries while busy. */
wakeNowHeartbeatBusyRetryDelayMs?: number;
runIsolatedAgentJob: (params: { job: CronJob; message: string }) => Promise<
runIsolatedAgentJob: (params: {
job: CronJob;
message: string;
abortSignal?: AbortSignal;
}) => Promise<
{
summary?: string;
/** Last non-empty agent text output (not truncated). */

View File

@@ -267,18 +267,20 @@ export async function onTimer(state: CronServiceState) {
: DEFAULT_JOB_TIMEOUT_MS;
try {
const runAbortController =
typeof jobTimeoutMs === "number" ? new AbortController() : undefined;
const result =
typeof jobTimeoutMs === "number"
? await (async () => {
let timeoutId: NodeJS.Timeout | undefined;
try {
return await Promise.race([
executeJobCore(state, job),
executeJobCore(state, job, runAbortController?.signal),
new Promise<never>((_, reject) => {
timeoutId = setTimeout(
() => reject(new Error("cron: job execution timed out")),
jobTimeoutMs,
);
timeoutId = setTimeout(() => {
runAbortController?.abort(new Error("cron: job execution timed out"));
reject(new Error("cron: job execution timed out"));
}, jobTimeoutMs);
}),
]);
} finally {
@@ -565,6 +567,7 @@ export async function runDueJobs(state: CronServiceState) {
async function executeJobCore(
state: CronServiceState,
job: CronJob,
abortSignal?: AbortSignal,
): Promise<CronRunOutcome & CronRunTelemetry & { delivered?: boolean }> {
if (job.sessionTarget === "main") {
const text = resolveJobPayloadTextForMain(job);
@@ -634,10 +637,14 @@ async function executeJobCore(
if (job.payload.kind !== "agentTurn") {
return { status: "skipped", error: "isolated job requires payload.kind=agentTurn" };
}
if (abortSignal?.aborted) {
return { status: "error", error: "cron: job execution aborted" };
}
const res = await state.deps.runIsolatedAgentJob({
job,
message: job.payload.message,
abortSignal,
});
// Post a short summary back to the main session — but only when the