mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 05:57:28 +00:00
feat(cron): configurable failure alerts for repeated job errors (openclaw#24789) thanks @0xbrak
Verified: - pnpm install --frozen-lockfile - pnpm check - pnpm test -- --run src/cron/service.failure-alert.test.ts src/cli/cron-cli.test.ts src/gateway/protocol/cron-validators.test.ts Co-authored-by: 0xbrak <181251288+0xbrak@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -80,6 +80,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
- Cron/Failure alerts: add configurable repeated-failure alerting with per-job overrides and Web UI cron editor support (`inherit|disabled|custom` with threshold/cooldown/channel/target fields). (#24789) Thanks xbrak.
|
||||||
- Cron/Isolated model defaults: resolve isolated cron `subagents.model` (including object-form `primary`) through allowlist-aware model selection so isolated cron runs honor subagent model defaults unless explicitly overridden by job payload model. (#11474) Thanks @AnonO6.
|
- Cron/Isolated model defaults: resolve isolated cron `subagents.model` (including object-form `primary`) through allowlist-aware model selection so isolated cron runs honor subagent model defaults unless explicitly overridden by job payload model. (#11474) Thanks @AnonO6.
|
||||||
- Cron/Isolated sessions list: persist the intended pre-run model/provider on isolated cron session entries so `sessions_list` reflects payload/session model overrides even when runs fail before post-run telemetry persistence. (#21279) Thanks @altaywtf.
|
- Cron/Isolated sessions list: persist the intended pre-run model/provider on isolated cron session entries so `sessions_list` reflects payload/session model overrides even when runs fail before post-run telemetry persistence. (#21279) Thanks @altaywtf.
|
||||||
- Cron/One-shot reliability: retry transient one-shot failures with bounded backoff and configurable retry policy before disabling. (#24435) Thanks .
|
- Cron/One-shot reliability: retry transient one-shot failures with bounded backoff and configurable retry policy before disabling. (#24435) Thanks .
|
||||||
|
|||||||
@@ -551,4 +551,53 @@ describe("cron cli", () => {
|
|||||||
it("rejects --exact on edit when existing job is not cron", async () => {
|
it("rejects --exact on edit when existing job is not cron", async () => {
|
||||||
await expectCronEditWithScheduleLookupExit({ kind: "every", everyMs: 60_000 }, ["--exact"]);
|
await expectCronEditWithScheduleLookupExit({ kind: "every", everyMs: 60_000 }, ["--exact"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("patches failure alert settings on cron edit", async () => {
|
||||||
|
callGatewayFromCli.mockClear();
|
||||||
|
|
||||||
|
const program = buildProgram();
|
||||||
|
|
||||||
|
await program.parseAsync(
|
||||||
|
[
|
||||||
|
"cron",
|
||||||
|
"edit",
|
||||||
|
"job-1",
|
||||||
|
"--failure-alert-after",
|
||||||
|
"3",
|
||||||
|
"--failure-alert-cooldown",
|
||||||
|
"1h",
|
||||||
|
"--failure-alert-channel",
|
||||||
|
"telegram",
|
||||||
|
"--failure-alert-to",
|
||||||
|
"19098680",
|
||||||
|
],
|
||||||
|
{ from: "user" },
|
||||||
|
);
|
||||||
|
|
||||||
|
const updateCall = callGatewayFromCli.mock.calls.find((call) => call[0] === "cron.update");
|
||||||
|
const patch = updateCall?.[2] as {
|
||||||
|
patch?: {
|
||||||
|
failureAlert?: { after?: number; cooldownMs?: number; channel?: string; to?: string };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
expect(patch?.patch?.failureAlert?.after).toBe(3);
|
||||||
|
expect(patch?.patch?.failureAlert?.cooldownMs).toBe(3_600_000);
|
||||||
|
expect(patch?.patch?.failureAlert?.channel).toBe("telegram");
|
||||||
|
expect(patch?.patch?.failureAlert?.to).toBe("19098680");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports --no-failure-alert on cron edit", async () => {
|
||||||
|
callGatewayFromCli.mockClear();
|
||||||
|
|
||||||
|
const program = buildProgram();
|
||||||
|
|
||||||
|
await program.parseAsync(["cron", "edit", "job-1", "--no-failure-alert"], {
|
||||||
|
from: "user",
|
||||||
|
});
|
||||||
|
|
||||||
|
const updateCall = callGatewayFromCli.mock.calls.find((call) => call[0] === "cron.update");
|
||||||
|
const patch = updateCall?.[2] as { patch?: { failureAlert?: boolean } };
|
||||||
|
expect(patch?.patch?.failureAlert).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -62,6 +62,15 @@ export function registerCronEditCommand(cron: Command) {
|
|||||||
.option("--account <id>", "Channel account id for delivery (multi-account setups)")
|
.option("--account <id>", "Channel account id for delivery (multi-account setups)")
|
||||||
.option("--best-effort-deliver", "Do not fail job if delivery fails")
|
.option("--best-effort-deliver", "Do not fail job if delivery fails")
|
||||||
.option("--no-best-effort-deliver", "Fail job when delivery fails")
|
.option("--no-best-effort-deliver", "Fail job when delivery fails")
|
||||||
|
.option("--failure-alert", "Enable failure alerts for this job")
|
||||||
|
.option("--no-failure-alert", "Disable failure alerts for this job")
|
||||||
|
.option("--failure-alert-after <n>", "Alert after N consecutive job errors")
|
||||||
|
.option(
|
||||||
|
"--failure-alert-channel <channel>",
|
||||||
|
`Failure alert channel (${getCronChannelOptions()})`,
|
||||||
|
)
|
||||||
|
.option("--failure-alert-to <dest>", "Failure alert destination")
|
||||||
|
.option("--failure-alert-cooldown <duration>", "Minimum time between alerts (e.g. 1h, 30m)")
|
||||||
.action(async (id, opts) => {
|
.action(async (id, opts) => {
|
||||||
try {
|
try {
|
||||||
if (opts.session === "main" && opts.message) {
|
if (opts.session === "main" && opts.message) {
|
||||||
@@ -264,6 +273,49 @@ export function registerCronEditCommand(cron: Command) {
|
|||||||
patch.delivery = delivery;
|
patch.delivery = delivery;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const hasFailureAlertAfter = typeof opts.failureAlertAfter === "string";
|
||||||
|
const hasFailureAlertChannel = typeof opts.failureAlertChannel === "string";
|
||||||
|
const hasFailureAlertTo = typeof opts.failureAlertTo === "string";
|
||||||
|
const hasFailureAlertCooldown = typeof opts.failureAlertCooldown === "string";
|
||||||
|
const hasFailureAlertFields =
|
||||||
|
hasFailureAlertAfter ||
|
||||||
|
hasFailureAlertChannel ||
|
||||||
|
hasFailureAlertTo ||
|
||||||
|
hasFailureAlertCooldown;
|
||||||
|
const failureAlertFlag =
|
||||||
|
typeof opts.failureAlert === "boolean" ? opts.failureAlert : undefined;
|
||||||
|
if (failureAlertFlag === false && hasFailureAlertFields) {
|
||||||
|
throw new Error("Use --no-failure-alert alone (without failure-alert-* options).");
|
||||||
|
}
|
||||||
|
if (failureAlertFlag === false) {
|
||||||
|
patch.failureAlert = false;
|
||||||
|
} else if (failureAlertFlag === true || hasFailureAlertFields) {
|
||||||
|
const failureAlert: Record<string, unknown> = {};
|
||||||
|
if (hasFailureAlertAfter) {
|
||||||
|
const after = Number.parseInt(String(opts.failureAlertAfter), 10);
|
||||||
|
if (!Number.isFinite(after) || after <= 0) {
|
||||||
|
throw new Error("Invalid --failure-alert-after (must be a positive integer).");
|
||||||
|
}
|
||||||
|
failureAlert.after = after;
|
||||||
|
}
|
||||||
|
if (hasFailureAlertChannel) {
|
||||||
|
const channel = String(opts.failureAlertChannel).trim().toLowerCase();
|
||||||
|
failureAlert.channel = channel ? channel : undefined;
|
||||||
|
}
|
||||||
|
if (hasFailureAlertTo) {
|
||||||
|
const to = String(opts.failureAlertTo).trim();
|
||||||
|
failureAlert.to = to ? to : undefined;
|
||||||
|
}
|
||||||
|
if (hasFailureAlertCooldown) {
|
||||||
|
const cooldownMs = parseDurationMs(String(opts.failureAlertCooldown));
|
||||||
|
if (!cooldownMs && cooldownMs !== 0) {
|
||||||
|
throw new Error("Invalid --failure-alert-cooldown.");
|
||||||
|
}
|
||||||
|
failureAlert.cooldownMs = cooldownMs;
|
||||||
|
}
|
||||||
|
patch.failureAlert = failureAlert;
|
||||||
|
}
|
||||||
|
|
||||||
const res = await callGatewayFromCli("cron.update", opts, {
|
const res = await callGatewayFromCli("cron.update", opts, {
|
||||||
id,
|
id,
|
||||||
patch,
|
patch,
|
||||||
|
|||||||
@@ -10,6 +10,12 @@ export type CronRetryConfig = {
|
|||||||
retryOn?: CronRetryOn[];
|
retryOn?: CronRetryOn[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type CronFailureAlertConfig = {
|
||||||
|
enabled?: boolean;
|
||||||
|
after?: number;
|
||||||
|
cooldownMs?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type CronConfig = {
|
export type CronConfig = {
|
||||||
enabled?: boolean;
|
enabled?: boolean;
|
||||||
store?: string;
|
store?: string;
|
||||||
@@ -37,4 +43,5 @@ export type CronConfig = {
|
|||||||
maxBytes?: number | string;
|
maxBytes?: number | string;
|
||||||
keepLines?: number;
|
keepLines?: number;
|
||||||
};
|
};
|
||||||
|
failureAlert?: CronFailureAlertConfig;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -395,6 +395,14 @@ export const OpenClawSchema = z
|
|||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
failureAlert: z
|
||||||
|
.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
after: z.number().int().min(1).optional(),
|
||||||
|
cooldownMs: z.number().int().min(0).optional(),
|
||||||
|
})
|
||||||
|
.strict()
|
||||||
|
.optional(),
|
||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.superRefine((val, ctx) => {
|
.superRefine((val, ctx) => {
|
||||||
|
|||||||
198
src/cron/service.failure-alert.test.ts
Normal file
198
src/cron/service.failure-alert.test.ts
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
import { CronService } from "./service.js";
|
||||||
|
|
||||||
|
const noopLogger = {
|
||||||
|
debug: vi.fn(),
|
||||||
|
info: vi.fn(),
|
||||||
|
warn: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
};
|
||||||
|
|
||||||
|
async function makeStorePath() {
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cron-failure-alert-"));
|
||||||
|
return {
|
||||||
|
storePath: path.join(dir, "cron", "jobs.json"),
|
||||||
|
cleanup: async () => {
|
||||||
|
await fs.rm(dir, { recursive: true, force: true });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("CronService failure alerts", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
vi.setSystemTime(new Date("2026-01-01T00:00:00.000Z"));
|
||||||
|
noopLogger.debug.mockClear();
|
||||||
|
noopLogger.info.mockClear();
|
||||||
|
noopLogger.warn.mockClear();
|
||||||
|
noopLogger.error.mockClear();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("alerts after configured consecutive failures and honors cooldown", async () => {
|
||||||
|
const store = await makeStorePath();
|
||||||
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
||||||
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
||||||
|
status: "error" as const,
|
||||||
|
error: "wrong model id",
|
||||||
|
}));
|
||||||
|
|
||||||
|
const cron = new CronService({
|
||||||
|
storePath: store.storePath,
|
||||||
|
cronEnabled: true,
|
||||||
|
cronConfig: {
|
||||||
|
failureAlert: {
|
||||||
|
enabled: true,
|
||||||
|
after: 2,
|
||||||
|
cooldownMs: 60_000,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: noopLogger,
|
||||||
|
enqueueSystemEvent: vi.fn(),
|
||||||
|
requestHeartbeatNow: vi.fn(),
|
||||||
|
runIsolatedAgentJob,
|
||||||
|
sendCronFailureAlert,
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.start();
|
||||||
|
const job = await cron.add({
|
||||||
|
name: "daily report",
|
||||||
|
enabled: true,
|
||||||
|
schedule: { kind: "every", everyMs: 60_000 },
|
||||||
|
sessionTarget: "isolated",
|
||||||
|
wakeMode: "next-heartbeat",
|
||||||
|
payload: { kind: "agentTurn", message: "run report" },
|
||||||
|
delivery: { mode: "announce", channel: "telegram", to: "19098680" },
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
job: expect.objectContaining({ id: job.id }),
|
||||||
|
channel: "telegram",
|
||||||
|
to: "19098680",
|
||||||
|
text: expect.stringContaining('Cron job "daily report" failed 2 times'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
vi.advanceTimersByTime(60_000);
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(2);
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
text: expect.stringContaining('Cron job "daily report" failed 4 times'),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
cron.stop();
|
||||||
|
await store.cleanup();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports per-job failure alert override when global alerts are disabled", async () => {
|
||||||
|
const store = await makeStorePath();
|
||||||
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
||||||
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
||||||
|
status: "error" as const,
|
||||||
|
error: "timeout",
|
||||||
|
}));
|
||||||
|
|
||||||
|
const cron = new CronService({
|
||||||
|
storePath: store.storePath,
|
||||||
|
cronEnabled: true,
|
||||||
|
cronConfig: {
|
||||||
|
failureAlert: {
|
||||||
|
enabled: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: noopLogger,
|
||||||
|
enqueueSystemEvent: vi.fn(),
|
||||||
|
requestHeartbeatNow: vi.fn(),
|
||||||
|
runIsolatedAgentJob,
|
||||||
|
sendCronFailureAlert,
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.start();
|
||||||
|
const job = await cron.add({
|
||||||
|
name: "job with override",
|
||||||
|
enabled: true,
|
||||||
|
schedule: { kind: "every", everyMs: 60_000 },
|
||||||
|
sessionTarget: "isolated",
|
||||||
|
wakeMode: "next-heartbeat",
|
||||||
|
payload: { kind: "agentTurn", message: "run report" },
|
||||||
|
failureAlert: {
|
||||||
|
after: 1,
|
||||||
|
channel: "telegram",
|
||||||
|
to: "12345",
|
||||||
|
cooldownMs: 1,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenCalledTimes(1);
|
||||||
|
expect(sendCronFailureAlert).toHaveBeenLastCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
channel: "telegram",
|
||||||
|
to: "12345",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
cron.stop();
|
||||||
|
await store.cleanup();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("respects per-job failureAlert=false and suppresses alerts", async () => {
|
||||||
|
const store = await makeStorePath();
|
||||||
|
const sendCronFailureAlert = vi.fn(async () => undefined);
|
||||||
|
const runIsolatedAgentJob = vi.fn(async () => ({
|
||||||
|
status: "error" as const,
|
||||||
|
error: "auth error",
|
||||||
|
}));
|
||||||
|
|
||||||
|
const cron = new CronService({
|
||||||
|
storePath: store.storePath,
|
||||||
|
cronEnabled: true,
|
||||||
|
cronConfig: {
|
||||||
|
failureAlert: {
|
||||||
|
enabled: true,
|
||||||
|
after: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
log: noopLogger,
|
||||||
|
enqueueSystemEvent: vi.fn(),
|
||||||
|
requestHeartbeatNow: vi.fn(),
|
||||||
|
runIsolatedAgentJob,
|
||||||
|
sendCronFailureAlert,
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.start();
|
||||||
|
const job = await cron.add({
|
||||||
|
name: "disabled alert job",
|
||||||
|
enabled: true,
|
||||||
|
schedule: { kind: "every", everyMs: 60_000 },
|
||||||
|
sessionTarget: "isolated",
|
||||||
|
wakeMode: "next-heartbeat",
|
||||||
|
payload: { kind: "agentTurn", message: "run report" },
|
||||||
|
failureAlert: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
await cron.run(job.id, "force");
|
||||||
|
expect(sendCronFailureAlert).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
cron.stop();
|
||||||
|
await store.cleanup();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -9,6 +9,7 @@ import {
|
|||||||
import type {
|
import type {
|
||||||
CronDelivery,
|
CronDelivery,
|
||||||
CronDeliveryPatch,
|
CronDeliveryPatch,
|
||||||
|
CronFailureAlert,
|
||||||
CronJob,
|
CronJob,
|
||||||
CronJobCreate,
|
CronJobCreate,
|
||||||
CronJobPatch,
|
CronJobPatch,
|
||||||
@@ -419,6 +420,7 @@ export function createJob(state: CronServiceState, input: CronJobCreate): CronJo
|
|||||||
wakeMode: input.wakeMode,
|
wakeMode: input.wakeMode,
|
||||||
payload: input.payload,
|
payload: input.payload,
|
||||||
delivery: input.delivery,
|
delivery: input.delivery,
|
||||||
|
failureAlert: input.failureAlert,
|
||||||
state: {
|
state: {
|
||||||
...input.state,
|
...input.state,
|
||||||
},
|
},
|
||||||
@@ -483,6 +485,9 @@ export function applyJobPatch(job: CronJob, patch: CronJobPatch) {
|
|||||||
if (patch.delivery) {
|
if (patch.delivery) {
|
||||||
job.delivery = mergeCronDelivery(job.delivery, patch.delivery);
|
job.delivery = mergeCronDelivery(job.delivery, patch.delivery);
|
||||||
}
|
}
|
||||||
|
if ("failureAlert" in patch) {
|
||||||
|
job.failureAlert = mergeCronFailureAlert(job.failureAlert, patch.failureAlert);
|
||||||
|
}
|
||||||
if (job.sessionTarget === "main" && job.delivery?.mode !== "webhook") {
|
if (job.sessionTarget === "main" && job.delivery?.mode !== "webhook") {
|
||||||
job.delivery = undefined;
|
job.delivery = undefined;
|
||||||
}
|
}
|
||||||
@@ -648,6 +653,42 @@ function mergeCronDelivery(
|
|||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mergeCronFailureAlert(
|
||||||
|
existing: CronFailureAlert | false | undefined,
|
||||||
|
patch: CronFailureAlert | false | undefined,
|
||||||
|
): CronFailureAlert | false | undefined {
|
||||||
|
if (patch === false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (patch === undefined) {
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
const base = existing === false || existing === undefined ? {} : existing;
|
||||||
|
const next: CronFailureAlert = { ...base };
|
||||||
|
|
||||||
|
if ("after" in patch) {
|
||||||
|
const after = typeof patch.after === "number" && Number.isFinite(patch.after) ? patch.after : 0;
|
||||||
|
next.after = after > 0 ? Math.floor(after) : undefined;
|
||||||
|
}
|
||||||
|
if ("channel" in patch) {
|
||||||
|
const channel = typeof patch.channel === "string" ? patch.channel.trim() : "";
|
||||||
|
next.channel = channel ? channel : undefined;
|
||||||
|
}
|
||||||
|
if ("to" in patch) {
|
||||||
|
const to = typeof patch.to === "string" ? patch.to.trim() : "";
|
||||||
|
next.to = to ? to : undefined;
|
||||||
|
}
|
||||||
|
if ("cooldownMs" in patch) {
|
||||||
|
const cooldownMs =
|
||||||
|
typeof patch.cooldownMs === "number" && Number.isFinite(patch.cooldownMs)
|
||||||
|
? patch.cooldownMs
|
||||||
|
: -1;
|
||||||
|
next.cooldownMs = cooldownMs >= 0 ? Math.floor(cooldownMs) : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
export function isJobDue(job: CronJob, nowMs: number, opts: { forced: boolean }) {
|
export function isJobDue(job: CronJob, nowMs: number, opts: { forced: boolean }) {
|
||||||
if (!job.state) {
|
if (!job.state) {
|
||||||
job.state = {};
|
job.state = {};
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import type {
|
|||||||
CronJob,
|
CronJob,
|
||||||
CronJobCreate,
|
CronJobCreate,
|
||||||
CronJobPatch,
|
CronJobPatch,
|
||||||
|
CronMessageChannel,
|
||||||
CronRunOutcome,
|
CronRunOutcome,
|
||||||
CronRunStatus,
|
CronRunStatus,
|
||||||
CronRunTelemetry,
|
CronRunTelemetry,
|
||||||
@@ -90,6 +91,12 @@ export type CronServiceDeps = {
|
|||||||
} & CronRunOutcome &
|
} & CronRunOutcome &
|
||||||
CronRunTelemetry
|
CronRunTelemetry
|
||||||
>;
|
>;
|
||||||
|
sendCronFailureAlert?: (params: {
|
||||||
|
job: CronJob;
|
||||||
|
text: string;
|
||||||
|
channel: CronMessageChannel;
|
||||||
|
to?: string;
|
||||||
|
}) => Promise<void>;
|
||||||
onEvent?: (evt: CronEvent) => void;
|
onEvent?: (evt: CronEvent) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { sweepCronRunSessions } from "../session-reaper.js";
|
|||||||
import type {
|
import type {
|
||||||
CronDeliveryStatus,
|
CronDeliveryStatus,
|
||||||
CronJob,
|
CronJob,
|
||||||
|
CronMessageChannel,
|
||||||
CronRunOutcome,
|
CronRunOutcome,
|
||||||
CronRunStatus,
|
CronRunStatus,
|
||||||
CronRunTelemetry,
|
CronRunTelemetry,
|
||||||
@@ -33,6 +34,8 @@ const MAX_TIMER_DELAY_MS = 60_000;
|
|||||||
* but always breaks an infinite re-trigger cycle. (See #17821)
|
* but always breaks an infinite re-trigger cycle. (See #17821)
|
||||||
*/
|
*/
|
||||||
const MIN_REFIRE_GAP_MS = 2_000;
|
const MIN_REFIRE_GAP_MS = 2_000;
|
||||||
|
const DEFAULT_FAILURE_ALERT_AFTER = 2;
|
||||||
|
const DEFAULT_FAILURE_ALERT_COOLDOWN_MS = 60 * 60_000; // 1 hour
|
||||||
|
|
||||||
type TimedCronRunOutcome = CronRunOutcome &
|
type TimedCronRunOutcome = CronRunOutcome &
|
||||||
CronRunTelemetry & {
|
CronRunTelemetry & {
|
||||||
@@ -149,6 +152,106 @@ function resolveDeliveryStatus(params: { job: CronJob; delivered?: boolean }): C
|
|||||||
return resolveCronDeliveryPlan(params.job).requested ? "unknown" : "not-requested";
|
return resolveCronDeliveryPlan(params.job).requested ? "unknown" : "not-requested";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeCronMessageChannel(input: unknown): CronMessageChannel | undefined {
|
||||||
|
if (typeof input !== "string") {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const channel = input.trim().toLowerCase();
|
||||||
|
return channel ? (channel as CronMessageChannel) : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeTo(input: unknown): string | undefined {
|
||||||
|
if (typeof input !== "string") {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const to = input.trim();
|
||||||
|
return to ? to : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampPositiveInt(value: unknown, fallback: number): number {
|
||||||
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
const floored = Math.floor(value);
|
||||||
|
return floored >= 1 ? floored : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampNonNegativeInt(value: unknown, fallback: number): number {
|
||||||
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
const floored = Math.floor(value);
|
||||||
|
return floored >= 0 ? floored : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveFailureAlert(
|
||||||
|
state: CronServiceState,
|
||||||
|
job: CronJob,
|
||||||
|
): { after: number; cooldownMs: number; channel: CronMessageChannel; to?: string } | null {
|
||||||
|
const globalConfig = state.deps.cronConfig?.failureAlert;
|
||||||
|
const jobConfig = job.failureAlert === false ? undefined : job.failureAlert;
|
||||||
|
|
||||||
|
if (job.failureAlert === false) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!jobConfig && globalConfig?.enabled !== true) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
after: clampPositiveInt(jobConfig?.after ?? globalConfig?.after, DEFAULT_FAILURE_ALERT_AFTER),
|
||||||
|
cooldownMs: clampNonNegativeInt(
|
||||||
|
jobConfig?.cooldownMs ?? globalConfig?.cooldownMs,
|
||||||
|
DEFAULT_FAILURE_ALERT_COOLDOWN_MS,
|
||||||
|
),
|
||||||
|
channel:
|
||||||
|
normalizeCronMessageChannel(jobConfig?.channel) ??
|
||||||
|
normalizeCronMessageChannel(job.delivery?.channel) ??
|
||||||
|
"last",
|
||||||
|
to: normalizeTo(jobConfig?.to) ?? normalizeTo(job.delivery?.to),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function emitFailureAlert(
|
||||||
|
state: CronServiceState,
|
||||||
|
params: {
|
||||||
|
job: CronJob;
|
||||||
|
error?: string;
|
||||||
|
consecutiveErrors: number;
|
||||||
|
channel: CronMessageChannel;
|
||||||
|
to?: string;
|
||||||
|
},
|
||||||
|
) {
|
||||||
|
const safeJobName = params.job.name || params.job.id;
|
||||||
|
const truncatedError = (params.error?.trim() || "unknown error").slice(0, 200);
|
||||||
|
const text = [
|
||||||
|
`Cron job "${safeJobName}" failed ${params.consecutiveErrors} times`,
|
||||||
|
`Last error: ${truncatedError}`,
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
if (state.deps.sendCronFailureAlert) {
|
||||||
|
void state.deps
|
||||||
|
.sendCronFailureAlert({
|
||||||
|
job: params.job,
|
||||||
|
text,
|
||||||
|
channel: params.channel,
|
||||||
|
to: params.to,
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
state.deps.log.warn(
|
||||||
|
{ jobId: params.job.id, err: String(err) },
|
||||||
|
"cron: failure alert delivery failed",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
state.deps.enqueueSystemEvent(text, { agentId: params.job.agentId });
|
||||||
|
if (params.job.wakeMode === "now") {
|
||||||
|
state.deps.requestHeartbeatNow({ reason: `cron:${params.job.id}:failure-alert` });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Apply the result of a job execution to the job's state.
|
* Apply the result of a job execution to the job's state.
|
||||||
* Handles consecutive error tracking, exponential backoff, one-shot disable,
|
* Handles consecutive error tracking, exponential backoff, one-shot disable,
|
||||||
@@ -181,8 +284,26 @@ export function applyJobResult(
|
|||||||
// Track consecutive errors for backoff / auto-disable.
|
// Track consecutive errors for backoff / auto-disable.
|
||||||
if (result.status === "error") {
|
if (result.status === "error") {
|
||||||
job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1;
|
job.state.consecutiveErrors = (job.state.consecutiveErrors ?? 0) + 1;
|
||||||
|
const alertConfig = resolveFailureAlert(state, job);
|
||||||
|
if (alertConfig && job.state.consecutiveErrors >= alertConfig.after) {
|
||||||
|
const now = state.deps.nowMs();
|
||||||
|
const lastAlert = job.state.lastFailureAlertAtMs;
|
||||||
|
const inCooldown =
|
||||||
|
typeof lastAlert === "number" && now - lastAlert < Math.max(0, alertConfig.cooldownMs);
|
||||||
|
if (!inCooldown) {
|
||||||
|
emitFailureAlert(state, {
|
||||||
|
job,
|
||||||
|
error: result.error,
|
||||||
|
consecutiveErrors: job.state.consecutiveErrors,
|
||||||
|
channel: alertConfig.channel,
|
||||||
|
to: alertConfig.to,
|
||||||
|
});
|
||||||
|
job.state.lastFailureAlertAtMs = now;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
job.state.consecutiveErrors = 0;
|
job.state.consecutiveErrors = 0;
|
||||||
|
job.state.lastFailureAlertAtMs = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
const shouldDelete =
|
const shouldDelete =
|
||||||
|
|||||||
@@ -56,6 +56,13 @@ export type CronRunOutcome = {
|
|||||||
sessionKey?: string;
|
sessionKey?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type CronFailureAlert = {
|
||||||
|
after?: number;
|
||||||
|
channel?: CronMessageChannel;
|
||||||
|
to?: string;
|
||||||
|
cooldownMs?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type CronPayload =
|
export type CronPayload =
|
||||||
| { kind: "systemEvent"; text: string }
|
| { kind: "systemEvent"; text: string }
|
||||||
| {
|
| {
|
||||||
@@ -102,6 +109,8 @@ export type CronJobState = {
|
|||||||
lastDurationMs?: number;
|
lastDurationMs?: number;
|
||||||
/** Number of consecutive execution errors (reset on success). Used for backoff. */
|
/** Number of consecutive execution errors (reset on success). Used for backoff. */
|
||||||
consecutiveErrors?: number;
|
consecutiveErrors?: number;
|
||||||
|
/** Last failure alert timestamp (ms since epoch) for cooldown gating. */
|
||||||
|
lastFailureAlertAtMs?: number;
|
||||||
/** Number of consecutive schedule computation errors. Auto-disables job after threshold. */
|
/** Number of consecutive schedule computation errors. Auto-disables job after threshold. */
|
||||||
scheduleErrorCount?: number;
|
scheduleErrorCount?: number;
|
||||||
/** Explicit delivery outcome, separate from execution outcome. */
|
/** Explicit delivery outcome, separate from execution outcome. */
|
||||||
@@ -128,6 +137,7 @@ export type CronJob = {
|
|||||||
wakeMode: CronWakeMode;
|
wakeMode: CronWakeMode;
|
||||||
payload: CronPayload;
|
payload: CronPayload;
|
||||||
delivery?: CronDelivery;
|
delivery?: CronDelivery;
|
||||||
|
failureAlert?: CronFailureAlert | false;
|
||||||
state: CronJobState;
|
state: CronJobState;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -187,6 +187,16 @@ export const CronDeliveryPatchSchema = Type.Object(
|
|||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
);
|
);
|
||||||
|
|
||||||
|
export const CronFailureAlertSchema = Type.Object(
|
||||||
|
{
|
||||||
|
after: Type.Optional(Type.Integer({ minimum: 1 })),
|
||||||
|
channel: Type.Optional(Type.Union([Type.Literal("last"), NonEmptyString])),
|
||||||
|
to: Type.Optional(Type.String()),
|
||||||
|
cooldownMs: Type.Optional(Type.Integer({ minimum: 0 })),
|
||||||
|
},
|
||||||
|
{ additionalProperties: false },
|
||||||
|
);
|
||||||
|
|
||||||
export const CronJobStateSchema = Type.Object(
|
export const CronJobStateSchema = Type.Object(
|
||||||
{
|
{
|
||||||
nextRunAtMs: Type.Optional(Type.Integer({ minimum: 0 })),
|
nextRunAtMs: Type.Optional(Type.Integer({ minimum: 0 })),
|
||||||
@@ -200,6 +210,7 @@ export const CronJobStateSchema = Type.Object(
|
|||||||
lastDelivered: Type.Optional(Type.Boolean()),
|
lastDelivered: Type.Optional(Type.Boolean()),
|
||||||
lastDeliveryStatus: Type.Optional(CronDeliveryStatusSchema),
|
lastDeliveryStatus: Type.Optional(CronDeliveryStatusSchema),
|
||||||
lastDeliveryError: Type.Optional(Type.String()),
|
lastDeliveryError: Type.Optional(Type.String()),
|
||||||
|
lastFailureAlertAtMs: Type.Optional(Type.Integer({ minimum: 0 })),
|
||||||
},
|
},
|
||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
);
|
);
|
||||||
@@ -220,6 +231,7 @@ export const CronJobSchema = Type.Object(
|
|||||||
wakeMode: CronWakeModeSchema,
|
wakeMode: CronWakeModeSchema,
|
||||||
payload: CronPayloadSchema,
|
payload: CronPayloadSchema,
|
||||||
delivery: Type.Optional(CronDeliverySchema),
|
delivery: Type.Optional(CronDeliverySchema),
|
||||||
|
failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])),
|
||||||
state: CronJobStateSchema,
|
state: CronJobStateSchema,
|
||||||
},
|
},
|
||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
@@ -249,6 +261,7 @@ export const CronAddParamsSchema = Type.Object(
|
|||||||
wakeMode: CronWakeModeSchema,
|
wakeMode: CronWakeModeSchema,
|
||||||
payload: CronPayloadSchema,
|
payload: CronPayloadSchema,
|
||||||
delivery: Type.Optional(CronDeliverySchema),
|
delivery: Type.Optional(CronDeliverySchema),
|
||||||
|
failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])),
|
||||||
},
|
},
|
||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
);
|
);
|
||||||
@@ -262,6 +275,7 @@ export const CronJobPatchSchema = Type.Object(
|
|||||||
wakeMode: Type.Optional(CronWakeModeSchema),
|
wakeMode: Type.Optional(CronWakeModeSchema),
|
||||||
payload: Type.Optional(CronPayloadPatchSchema),
|
payload: Type.Optional(CronPayloadPatchSchema),
|
||||||
delivery: Type.Optional(CronDeliveryPatchSchema),
|
delivery: Type.Optional(CronDeliveryPatchSchema),
|
||||||
|
failureAlert: Type.Optional(Type.Union([Type.Literal(false), CronFailureAlertSchema])),
|
||||||
state: Type.Optional(Type.Partial(CronJobStateSchema)),
|
state: Type.Optional(Type.Partial(CronJobStateSchema)),
|
||||||
},
|
},
|
||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { resolveDefaultAgentId } from "../agents/agent-scope.js";
|
import { resolveDefaultAgentId } from "../agents/agent-scope.js";
|
||||||
import type { CliDeps } from "../cli/deps.js";
|
import type { CliDeps } from "../cli/deps.js";
|
||||||
|
import { createOutboundSendDeps } from "../cli/outbound-send-deps.js";
|
||||||
import { loadConfig } from "../config/config.js";
|
import { loadConfig } from "../config/config.js";
|
||||||
import {
|
import {
|
||||||
canonicalizeMainSessionAlias,
|
canonicalizeMainSessionAlias,
|
||||||
@@ -8,6 +9,7 @@ import {
|
|||||||
} from "../config/sessions.js";
|
} from "../config/sessions.js";
|
||||||
import { resolveStorePath } from "../config/sessions/paths.js";
|
import { resolveStorePath } from "../config/sessions/paths.js";
|
||||||
import { runCronIsolatedAgentTurn } from "../cron/isolated-agent.js";
|
import { runCronIsolatedAgentTurn } from "../cron/isolated-agent.js";
|
||||||
|
import { resolveDeliveryTarget } from "../cron/isolated-agent/delivery-target.js";
|
||||||
import {
|
import {
|
||||||
appendCronRunLog,
|
appendCronRunLog,
|
||||||
resolveCronRunLogPath,
|
resolveCronRunLogPath,
|
||||||
@@ -21,6 +23,7 @@ import { runHeartbeatOnce } from "../infra/heartbeat-runner.js";
|
|||||||
import { requestHeartbeatNow } from "../infra/heartbeat-wake.js";
|
import { requestHeartbeatNow } from "../infra/heartbeat-wake.js";
|
||||||
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
||||||
import { SsrFBlockedError } from "../infra/net/ssrf.js";
|
import { SsrFBlockedError } from "../infra/net/ssrf.js";
|
||||||
|
import { deliverOutboundPayloads } from "../infra/outbound/deliver.js";
|
||||||
import { enqueueSystemEvent } from "../infra/system-events.js";
|
import { enqueueSystemEvent } from "../infra/system-events.js";
|
||||||
import { getChildLogger } from "../logging.js";
|
import { getChildLogger } from "../logging.js";
|
||||||
import { normalizeAgentId, toAgentStoreSessionKey } from "../routing/session-key.js";
|
import { normalizeAgentId, toAgentStoreSessionKey } from "../routing/session-key.js";
|
||||||
@@ -223,6 +226,25 @@ export function buildGatewayCronService(params: {
|
|||||||
lane: "cron",
|
lane: "cron",
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
sendCronFailureAlert: async ({ job, text, channel, to }) => {
|
||||||
|
const { agentId, cfg: runtimeConfig } = resolveCronAgent(job.agentId);
|
||||||
|
const target = await resolveDeliveryTarget(runtimeConfig, agentId, {
|
||||||
|
channel,
|
||||||
|
to,
|
||||||
|
});
|
||||||
|
if (!target.ok) {
|
||||||
|
throw target.error;
|
||||||
|
}
|
||||||
|
await deliverOutboundPayloads({
|
||||||
|
cfg: runtimeConfig,
|
||||||
|
channel: target.channel,
|
||||||
|
to: target.to,
|
||||||
|
accountId: target.accountId,
|
||||||
|
threadId: target.threadId,
|
||||||
|
payloads: [{ text }],
|
||||||
|
deps: createOutboundSendDeps(params.deps),
|
||||||
|
});
|
||||||
|
},
|
||||||
log: getChildLogger({ module: "cron", storePath }),
|
log: getChildLogger({ module: "cron", storePath }),
|
||||||
onEvent: (evt) => {
|
onEvent: (evt) => {
|
||||||
params.broadcast("cron", evt, { dropIfSlow: true });
|
params.broadcast("cron", evt, { dropIfSlow: true });
|
||||||
|
|||||||
@@ -36,5 +36,10 @@ export const DEFAULT_CRON_FORM: CronFormState = {
|
|||||||
deliveryChannel: "last",
|
deliveryChannel: "last",
|
||||||
deliveryTo: "",
|
deliveryTo: "",
|
||||||
deliveryBestEffort: false,
|
deliveryBestEffort: false,
|
||||||
|
failureAlertMode: "inherit",
|
||||||
|
failureAlertAfter: "2",
|
||||||
|
failureAlertCooldownSeconds: "3600",
|
||||||
|
failureAlertChannel: "last",
|
||||||
|
failureAlertTo: "",
|
||||||
timeoutSeconds: "",
|
timeoutSeconds: "",
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -298,6 +298,87 @@ describe("cron controller", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("includes custom failureAlert fields in cron.update patch", async () => {
|
||||||
|
const request = vi.fn(async (method: string, _payload?: unknown) => {
|
||||||
|
if (method === "cron.update") {
|
||||||
|
return { id: "job-alert" };
|
||||||
|
}
|
||||||
|
if (method === "cron.list") {
|
||||||
|
return { jobs: [{ id: "job-alert" }] };
|
||||||
|
}
|
||||||
|
if (method === "cron.status") {
|
||||||
|
return { enabled: true, jobs: 1, nextWakeAtMs: null };
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
});
|
||||||
|
const state = createState({
|
||||||
|
client: { request } as unknown as CronState["client"],
|
||||||
|
cronEditingJobId: "job-alert",
|
||||||
|
cronForm: {
|
||||||
|
...DEFAULT_CRON_FORM,
|
||||||
|
name: "alert job",
|
||||||
|
payloadKind: "agentTurn",
|
||||||
|
payloadText: "run it",
|
||||||
|
failureAlertMode: "custom",
|
||||||
|
failureAlertAfter: "3",
|
||||||
|
failureAlertCooldownSeconds: "120",
|
||||||
|
failureAlertChannel: "telegram",
|
||||||
|
failureAlertTo: "123456",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await addCronJob(state);
|
||||||
|
|
||||||
|
const updateCall = request.mock.calls.find(([method]) => method === "cron.update");
|
||||||
|
expect(updateCall).toBeDefined();
|
||||||
|
expect(updateCall?.[1]).toMatchObject({
|
||||||
|
id: "job-alert",
|
||||||
|
patch: {
|
||||||
|
failureAlert: {
|
||||||
|
after: 3,
|
||||||
|
cooldownMs: 120_000,
|
||||||
|
channel: "telegram",
|
||||||
|
to: "123456",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("includes failureAlert=false when disabled per job", async () => {
|
||||||
|
const request = vi.fn(async (method: string, _payload?: unknown) => {
|
||||||
|
if (method === "cron.update") {
|
||||||
|
return { id: "job-no-alert" };
|
||||||
|
}
|
||||||
|
if (method === "cron.list") {
|
||||||
|
return { jobs: [{ id: "job-no-alert" }] };
|
||||||
|
}
|
||||||
|
if (method === "cron.status") {
|
||||||
|
return { enabled: true, jobs: 1, nextWakeAtMs: null };
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
});
|
||||||
|
const state = createState({
|
||||||
|
client: { request } as unknown as CronState["client"],
|
||||||
|
cronEditingJobId: "job-no-alert",
|
||||||
|
cronForm: {
|
||||||
|
...DEFAULT_CRON_FORM,
|
||||||
|
name: "alert off",
|
||||||
|
payloadKind: "agentTurn",
|
||||||
|
payloadText: "run it",
|
||||||
|
failureAlertMode: "disabled",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await addCronJob(state);
|
||||||
|
|
||||||
|
const updateCall = request.mock.calls.find(([method]) => method === "cron.update");
|
||||||
|
expect(updateCall).toBeDefined();
|
||||||
|
expect(updateCall?.[1]).toMatchObject({
|
||||||
|
id: "job-no-alert",
|
||||||
|
patch: { failureAlert: false },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("maps cron stagger, model, thinking, and best effort into form", () => {
|
it("maps cron stagger, model, thinking, and best effort into form", () => {
|
||||||
const state = createState();
|
const state = createState();
|
||||||
const job = {
|
const job = {
|
||||||
@@ -331,6 +412,36 @@ describe("cron controller", () => {
|
|||||||
expect(state.cronForm.deliveryBestEffort).toBe(true);
|
expect(state.cronForm.deliveryBestEffort).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("maps failureAlert overrides into form fields", () => {
|
||||||
|
const state = createState();
|
||||||
|
const job = {
|
||||||
|
id: "job-11",
|
||||||
|
name: "Failure alerts",
|
||||||
|
enabled: true,
|
||||||
|
createdAtMs: 0,
|
||||||
|
updatedAtMs: 0,
|
||||||
|
schedule: { kind: "every" as const, everyMs: 60_000 },
|
||||||
|
sessionTarget: "isolated" as const,
|
||||||
|
wakeMode: "next-heartbeat" as const,
|
||||||
|
payload: { kind: "agentTurn" as const, message: "hello" },
|
||||||
|
failureAlert: {
|
||||||
|
after: 4,
|
||||||
|
cooldownMs: 30_000,
|
||||||
|
channel: "telegram",
|
||||||
|
to: "999",
|
||||||
|
},
|
||||||
|
state: {},
|
||||||
|
};
|
||||||
|
|
||||||
|
startCronEdit(state, job);
|
||||||
|
|
||||||
|
expect(state.cronForm.failureAlertMode).toBe("custom");
|
||||||
|
expect(state.cronForm.failureAlertAfter).toBe("4");
|
||||||
|
expect(state.cronForm.failureAlertCooldownSeconds).toBe("30");
|
||||||
|
expect(state.cronForm.failureAlertChannel).toBe("telegram");
|
||||||
|
expect(state.cronForm.failureAlertTo).toBe("999");
|
||||||
|
});
|
||||||
|
|
||||||
it("validates key cron form errors", () => {
|
it("validates key cron form errors", () => {
|
||||||
const errors = validateCronForm({
|
const errors = validateCronForm({
|
||||||
...DEFAULT_CRON_FORM,
|
...DEFAULT_CRON_FORM,
|
||||||
|
|||||||
@@ -29,7 +29,9 @@ export type CronFieldKey =
|
|||||||
| "payloadModel"
|
| "payloadModel"
|
||||||
| "payloadThinking"
|
| "payloadThinking"
|
||||||
| "timeoutSeconds"
|
| "timeoutSeconds"
|
||||||
| "deliveryTo";
|
| "deliveryTo"
|
||||||
|
| "failureAlertAfter"
|
||||||
|
| "failureAlertCooldownSeconds";
|
||||||
|
|
||||||
export type CronFieldErrors = Partial<Record<CronFieldKey, string>>;
|
export type CronFieldErrors = Partial<Record<CronFieldKey, string>>;
|
||||||
|
|
||||||
@@ -145,6 +147,22 @@ export function validateCronForm(form: CronFormState): CronFieldErrors {
|
|||||||
errors.deliveryTo = "cron.errors.webhookUrlInvalid";
|
errors.deliveryTo = "cron.errors.webhookUrlInvalid";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (form.failureAlertMode === "custom") {
|
||||||
|
const afterRaw = form.failureAlertAfter.trim();
|
||||||
|
if (afterRaw) {
|
||||||
|
const after = toNumber(afterRaw, 0);
|
||||||
|
if (!Number.isFinite(after) || after <= 0) {
|
||||||
|
errors.failureAlertAfter = "Failure alert threshold must be greater than 0.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const cooldownRaw = form.failureAlertCooldownSeconds.trim();
|
||||||
|
if (cooldownRaw) {
|
||||||
|
const cooldown = toNumber(cooldownRaw, -1);
|
||||||
|
if (!Number.isFinite(cooldown) || cooldown < 0) {
|
||||||
|
errors.failureAlertCooldownSeconds = "Cooldown must be 0 or greater.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,6 +392,7 @@ function parseStaggerSchedule(
|
|||||||
}
|
}
|
||||||
|
|
||||||
function jobToForm(job: CronJob, prev: CronFormState): CronFormState {
|
function jobToForm(job: CronJob, prev: CronFormState): CronFormState {
|
||||||
|
const failureAlert = job.failureAlert;
|
||||||
const next: CronFormState = {
|
const next: CronFormState = {
|
||||||
...prev,
|
...prev,
|
||||||
name: job.name,
|
name: job.name,
|
||||||
@@ -401,6 +420,27 @@ function jobToForm(job: CronJob, prev: CronFormState): CronFormState {
|
|||||||
deliveryChannel: job.delivery?.channel ?? CRON_CHANNEL_LAST,
|
deliveryChannel: job.delivery?.channel ?? CRON_CHANNEL_LAST,
|
||||||
deliveryTo: job.delivery?.to ?? "",
|
deliveryTo: job.delivery?.to ?? "",
|
||||||
deliveryBestEffort: job.delivery?.bestEffort ?? false,
|
deliveryBestEffort: job.delivery?.bestEffort ?? false,
|
||||||
|
failureAlertMode:
|
||||||
|
failureAlert === false
|
||||||
|
? "disabled"
|
||||||
|
: failureAlert && typeof failureAlert === "object"
|
||||||
|
? "custom"
|
||||||
|
: "inherit",
|
||||||
|
failureAlertAfter:
|
||||||
|
failureAlert && typeof failureAlert === "object" && typeof failureAlert.after === "number"
|
||||||
|
? String(failureAlert.after)
|
||||||
|
: DEFAULT_CRON_FORM.failureAlertAfter,
|
||||||
|
failureAlertCooldownSeconds:
|
||||||
|
failureAlert &&
|
||||||
|
typeof failureAlert === "object" &&
|
||||||
|
typeof failureAlert.cooldownMs === "number"
|
||||||
|
? String(Math.floor(failureAlert.cooldownMs / 1000))
|
||||||
|
: DEFAULT_CRON_FORM.failureAlertCooldownSeconds,
|
||||||
|
failureAlertChannel:
|
||||||
|
failureAlert && typeof failureAlert === "object"
|
||||||
|
? (failureAlert.channel ?? CRON_CHANNEL_LAST)
|
||||||
|
: CRON_CHANNEL_LAST,
|
||||||
|
failureAlertTo: failureAlert && typeof failureAlert === "object" ? (failureAlert.to ?? "") : "",
|
||||||
timeoutSeconds:
|
timeoutSeconds:
|
||||||
job.payload.kind === "agentTurn" && typeof job.payload.timeoutSeconds === "number"
|
job.payload.kind === "agentTurn" && typeof job.payload.timeoutSeconds === "number"
|
||||||
? String(job.payload.timeoutSeconds)
|
? String(job.payload.timeoutSeconds)
|
||||||
@@ -495,6 +535,26 @@ export function buildCronPayload(form: CronFormState) {
|
|||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildFailureAlert(form: CronFormState) {
|
||||||
|
if (form.failureAlertMode === "disabled") {
|
||||||
|
return false as const;
|
||||||
|
}
|
||||||
|
if (form.failureAlertMode !== "custom") {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const after = toNumber(form.failureAlertAfter.trim(), 0);
|
||||||
|
const cooldownSeconds = toNumber(form.failureAlertCooldownSeconds.trim(), 0);
|
||||||
|
return {
|
||||||
|
after: after > 0 ? Math.floor(after) : undefined,
|
||||||
|
channel: form.failureAlertChannel.trim() || CRON_CHANNEL_LAST,
|
||||||
|
to: form.failureAlertTo.trim() || undefined,
|
||||||
|
cooldownMs:
|
||||||
|
Number.isFinite(cooldownSeconds) && cooldownSeconds >= 0
|
||||||
|
? Math.floor(cooldownSeconds * 1000)
|
||||||
|
: undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export async function addCronJob(state: CronState) {
|
export async function addCronJob(state: CronState) {
|
||||||
if (!state.client || !state.connected || state.cronBusy) {
|
if (!state.client || !state.connected || state.cronBusy) {
|
||||||
return;
|
return;
|
||||||
@@ -527,6 +587,7 @@ export async function addCronJob(state: CronState) {
|
|||||||
bestEffort: form.deliveryBestEffort,
|
bestEffort: form.deliveryBestEffort,
|
||||||
}
|
}
|
||||||
: undefined;
|
: undefined;
|
||||||
|
const failureAlert = buildFailureAlert(form);
|
||||||
const agentId = form.clearAgent ? null : form.agentId.trim();
|
const agentId = form.clearAgent ? null : form.agentId.trim();
|
||||||
const job = {
|
const job = {
|
||||||
name: form.name.trim(),
|
name: form.name.trim(),
|
||||||
@@ -539,6 +600,7 @@ export async function addCronJob(state: CronState) {
|
|||||||
wakeMode: form.wakeMode,
|
wakeMode: form.wakeMode,
|
||||||
payload,
|
payload,
|
||||||
delivery,
|
delivery,
|
||||||
|
failureAlert,
|
||||||
};
|
};
|
||||||
if (!job.name) {
|
if (!job.name) {
|
||||||
throw new Error(t("cron.errors.nameRequiredShort"));
|
throw new Error(t("cron.errors.nameRequiredShort"));
|
||||||
|
|||||||
@@ -491,6 +491,13 @@ export type CronDelivery = {
|
|||||||
bestEffort?: boolean;
|
bestEffort?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type CronFailureAlert = {
|
||||||
|
after?: number;
|
||||||
|
channel?: string;
|
||||||
|
to?: string;
|
||||||
|
cooldownMs?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type CronJobState = {
|
export type CronJobState = {
|
||||||
nextRunAtMs?: number;
|
nextRunAtMs?: number;
|
||||||
runningAtMs?: number;
|
runningAtMs?: number;
|
||||||
@@ -498,6 +505,7 @@ export type CronJobState = {
|
|||||||
lastStatus?: "ok" | "error" | "skipped";
|
lastStatus?: "ok" | "error" | "skipped";
|
||||||
lastError?: string;
|
lastError?: string;
|
||||||
lastDurationMs?: number;
|
lastDurationMs?: number;
|
||||||
|
lastFailureAlertAtMs?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type CronJob = {
|
export type CronJob = {
|
||||||
@@ -514,6 +522,7 @@ export type CronJob = {
|
|||||||
wakeMode: CronWakeMode;
|
wakeMode: CronWakeMode;
|
||||||
payload: CronPayload;
|
payload: CronPayload;
|
||||||
delivery?: CronDelivery;
|
delivery?: CronDelivery;
|
||||||
|
failureAlert?: CronFailureAlert | false;
|
||||||
state?: CronJobState;
|
state?: CronJobState;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -40,5 +40,10 @@ export type CronFormState = {
|
|||||||
deliveryChannel: string;
|
deliveryChannel: string;
|
||||||
deliveryTo: string;
|
deliveryTo: string;
|
||||||
deliveryBestEffort: boolean;
|
deliveryBestEffort: boolean;
|
||||||
|
failureAlertMode: "inherit" | "disabled" | "custom";
|
||||||
|
failureAlertAfter: string;
|
||||||
|
failureAlertCooldownSeconds: string;
|
||||||
|
failureAlertChannel: string;
|
||||||
|
failureAlertTo: string;
|
||||||
timeoutSeconds: string;
|
timeoutSeconds: string;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -239,6 +239,12 @@ function inputIdForField(key: CronFieldKey) {
|
|||||||
if (key === "timeoutSeconds") {
|
if (key === "timeoutSeconds") {
|
||||||
return "cron-timeout-seconds";
|
return "cron-timeout-seconds";
|
||||||
}
|
}
|
||||||
|
if (key === "failureAlertAfter") {
|
||||||
|
return "cron-failure-alert-after";
|
||||||
|
}
|
||||||
|
if (key === "failureAlertCooldownSeconds") {
|
||||||
|
return "cron-failure-alert-cooldown-seconds";
|
||||||
|
}
|
||||||
return "cron-delivery-to";
|
return "cron-delivery-to";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,6 +272,8 @@ function fieldLabelForKey(
|
|||||||
payloadThinking: t("cron.form.thinking"),
|
payloadThinking: t("cron.form.thinking"),
|
||||||
timeoutSeconds: t("cron.form.timeoutSeconds"),
|
timeoutSeconds: t("cron.form.timeoutSeconds"),
|
||||||
deliveryTo: t("cron.form.to"),
|
deliveryTo: t("cron.form.to"),
|
||||||
|
failureAlertAfter: "Failure alert after",
|
||||||
|
failureAlertCooldownSeconds: "Failure alert cooldown",
|
||||||
};
|
};
|
||||||
return labels[key];
|
return labels[key];
|
||||||
}
|
}
|
||||||
@@ -286,6 +294,8 @@ function collectBlockingFields(
|
|||||||
"payloadThinking",
|
"payloadThinking",
|
||||||
"timeoutSeconds",
|
"timeoutSeconds",
|
||||||
"deliveryTo",
|
"deliveryTo",
|
||||||
|
"failureAlertAfter",
|
||||||
|
"failureAlertCooldownSeconds",
|
||||||
];
|
];
|
||||||
const fields: BlockingField[] = [];
|
const fields: BlockingField[] = [];
|
||||||
for (const key of orderedKeys) {
|
for (const key of orderedKeys) {
|
||||||
@@ -1057,6 +1067,115 @@ export function renderCron(props: CronProps) {
|
|||||||
`
|
`
|
||||||
: nothing
|
: nothing
|
||||||
}
|
}
|
||||||
|
${
|
||||||
|
isAgentTurn
|
||||||
|
? html`
|
||||||
|
<label class="field cron-span-2">
|
||||||
|
${renderFieldLabel("Failure alerts")}
|
||||||
|
<select
|
||||||
|
.value=${props.form.failureAlertMode}
|
||||||
|
@change=${(e: Event) =>
|
||||||
|
props.onFormChange({
|
||||||
|
failureAlertMode: (e.target as HTMLSelectElement)
|
||||||
|
.value as CronFormState["failureAlertMode"],
|
||||||
|
})}
|
||||||
|
>
|
||||||
|
<option value="inherit">Inherit global setting</option>
|
||||||
|
<option value="disabled">Disable for this job</option>
|
||||||
|
<option value="custom">Custom per-job settings</option>
|
||||||
|
</select>
|
||||||
|
<div class="cron-help">
|
||||||
|
Control when this job sends repeated-failure alerts.
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
${
|
||||||
|
props.form.failureAlertMode === "custom"
|
||||||
|
? html`
|
||||||
|
<label class="field">
|
||||||
|
${renderFieldLabel("Alert after")}
|
||||||
|
<input
|
||||||
|
id="cron-failure-alert-after"
|
||||||
|
.value=${props.form.failureAlertAfter}
|
||||||
|
aria-invalid=${props.fieldErrors.failureAlertAfter ? "true" : "false"}
|
||||||
|
aria-describedby=${ifDefined(
|
||||||
|
props.fieldErrors.failureAlertAfter
|
||||||
|
? errorIdForField("failureAlertAfter")
|
||||||
|
: undefined,
|
||||||
|
)}
|
||||||
|
@input=${(e: Event) =>
|
||||||
|
props.onFormChange({
|
||||||
|
failureAlertAfter: (e.target as HTMLInputElement).value,
|
||||||
|
})}
|
||||||
|
placeholder="2"
|
||||||
|
/>
|
||||||
|
<div class="cron-help">Consecutive errors before alerting.</div>
|
||||||
|
${renderFieldError(
|
||||||
|
props.fieldErrors.failureAlertAfter,
|
||||||
|
errorIdForField("failureAlertAfter"),
|
||||||
|
)}
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
${renderFieldLabel("Cooldown (seconds)")}
|
||||||
|
<input
|
||||||
|
id="cron-failure-alert-cooldown-seconds"
|
||||||
|
.value=${props.form.failureAlertCooldownSeconds}
|
||||||
|
aria-invalid=${props.fieldErrors.failureAlertCooldownSeconds ? "true" : "false"}
|
||||||
|
aria-describedby=${ifDefined(
|
||||||
|
props.fieldErrors.failureAlertCooldownSeconds
|
||||||
|
? errorIdForField("failureAlertCooldownSeconds")
|
||||||
|
: undefined,
|
||||||
|
)}
|
||||||
|
@input=${(e: Event) =>
|
||||||
|
props.onFormChange({
|
||||||
|
failureAlertCooldownSeconds: (e.target as HTMLInputElement)
|
||||||
|
.value,
|
||||||
|
})}
|
||||||
|
placeholder="3600"
|
||||||
|
/>
|
||||||
|
<div class="cron-help">Minimum seconds between alerts.</div>
|
||||||
|
${renderFieldError(
|
||||||
|
props.fieldErrors.failureAlertCooldownSeconds,
|
||||||
|
errorIdForField("failureAlertCooldownSeconds"),
|
||||||
|
)}
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
${renderFieldLabel("Alert channel")}
|
||||||
|
<select
|
||||||
|
.value=${props.form.failureAlertChannel || "last"}
|
||||||
|
@change=${(e: Event) =>
|
||||||
|
props.onFormChange({
|
||||||
|
failureAlertChannel: (e.target as HTMLSelectElement).value,
|
||||||
|
})}
|
||||||
|
>
|
||||||
|
${channelOptions.map(
|
||||||
|
(channel) =>
|
||||||
|
html`<option value=${channel}>
|
||||||
|
${resolveChannelLabel(props, channel)}
|
||||||
|
</option>`,
|
||||||
|
)}
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<label class="field">
|
||||||
|
${renderFieldLabel("Alert to")}
|
||||||
|
<input
|
||||||
|
.value=${props.form.failureAlertTo}
|
||||||
|
list="cron-delivery-to-suggestions"
|
||||||
|
@input=${(e: Event) =>
|
||||||
|
props.onFormChange({
|
||||||
|
failureAlertTo: (e.target as HTMLInputElement).value,
|
||||||
|
})}
|
||||||
|
placeholder="+1555... or chat id"
|
||||||
|
/>
|
||||||
|
<div class="cron-help">
|
||||||
|
Optional recipient override for failure alerts.
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
`
|
||||||
|
: nothing
|
||||||
|
}
|
||||||
|
`
|
||||||
|
: nothing
|
||||||
|
}
|
||||||
${
|
${
|
||||||
selectedDeliveryMode !== "none"
|
selectedDeliveryMode !== "none"
|
||||||
? html`
|
? html`
|
||||||
|
|||||||
Reference in New Issue
Block a user