mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-12 15:01:11 +00:00
feat(cron): add failure destination support to failed cron jobs (#31059)
* feat(cron): add failure destination support with webhook mode and bestEffort handling Extends PR #24789 failure alerts with features from PR #29145: - Add webhook delivery mode for failure alerts (mode: 'webhook') - Add accountId support for multi-account channel configurations - Add bestEffort handling to skip alerts when job has bestEffort=true - Add separate failureDestination config (global + per-job in delivery) - Add duplicate prevention (prevents sending to same as primary delivery) - Add CLI flags: --failure-alert-mode, --failure-alert-account-id - Add UI fields for new options in web cron editor * fix(cron): merge failureAlert mode/accountId and preserve failureDestination on updates - Fix mergeCronFailureAlert to merge mode and accountId fields - Fix mergeCronDelivery to preserve failureDestination on updates - Fix isSameDeliveryTarget to use 'announce' as default instead of 'none' to properly detect duplicates when delivery.mode is undefined * fix(cron): validate webhook mode requires URL in resolveFailureDestination When mode is 'webhook' but no 'to' URL is provided, return null instead of creating an invalid plan that silently fails later. * fix(cron): fail closed on webhook mode without URL and make failureDestination fields clearable - sendCronFailureAlert: fail closed when mode is webhook but URL is missing - mergeCronDelivery: use per-key presence checks so callers can clear nested failureDestination fields via cron.update Note: protocol:check shows missing internalEvents in Swift models - this is a pre-existing issue unrelated to these changes (upstream sync needed). * fix(cron): use separate schema for failureDestination and fix type cast - Create CronFailureDestinationSchema excluding after/cooldownMs fields - Fix type cast in sendFailureNotificationAnnounce to use CronMessageChannel * fix(cron): merge global failureDestination with partial job overrides When job has partial failureDestination config, fall back to global config for unset fields instead of treating it as a full override. * fix(cron): avoid forcing announce mode and clear inherited to on mode change - UI: only include mode in patch if explicitly set to non-default - delivery.ts: clear inherited 'to' when job overrides mode, since URL semantics differ between announce and webhook modes * fix(cron): preserve explicit to on mode override and always include mode in UI patches - delivery.ts: preserve job-level explicit 'to' when overriding mode - UI: always include mode in failureAlert patch so users can switch between announce/webhook * fix(cron): allow clearing accountId and treat undefined global mode as announce - UI: always include accountId in patch so users can clear it - delivery.ts: treat undefined global mode as announce when comparing for clearing inherited 'to' * Cron: harden failure destination routing and add regression coverage * Cron: resolve failure destination review feedback * Cron: drop unrelated timeout assertions from conflict resolution * Cron: format cron CLI regression test * Cron: align gateway cron test mock types --------- Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -1,4 +1,14 @@
|
||||
import type { CronDeliveryMode, CronJob, CronMessageChannel } from "./types.js";
|
||||
import type { CliDeps } from "../cli/deps.js";
|
||||
import { createOutboundSendDeps } from "../cli/outbound-send-deps.js";
|
||||
import type { CronFailureDestinationConfig } from "../config/types.cron.js";
|
||||
import type { OpenClawConfig } from "../config/types.js";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import { deliverOutboundPayloads } from "../infra/outbound/deliver.js";
|
||||
import { resolveAgentOutboundIdentity } from "../infra/outbound/identity.js";
|
||||
import { buildOutboundSessionContext } from "../infra/outbound/session-context.js";
|
||||
import { getChildLogger } from "../logging.js";
|
||||
import { resolveDeliveryTarget } from "./isolated-agent/delivery-target.js";
|
||||
import type { CronDelivery, CronDeliveryMode, CronJob, CronMessageChannel } from "./types.js";
|
||||
|
||||
export type CronDeliveryPlan = {
|
||||
mode: CronDeliveryMode;
|
||||
@@ -90,3 +100,202 @@ export function resolveCronDeliveryPlan(job: CronJob): CronDeliveryPlan {
|
||||
requested,
|
||||
};
|
||||
}
|
||||
|
||||
export type CronFailureDeliveryPlan = {
|
||||
mode: "announce" | "webhook";
|
||||
channel?: CronMessageChannel;
|
||||
to?: string;
|
||||
accountId?: string;
|
||||
};
|
||||
|
||||
export type CronFailureDestinationInput = {
|
||||
channel?: CronMessageChannel;
|
||||
to?: string;
|
||||
accountId?: string;
|
||||
mode?: "announce" | "webhook";
|
||||
};
|
||||
|
||||
function normalizeFailureMode(value: unknown): "announce" | "webhook" | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = value.trim().toLowerCase();
|
||||
if (trimmed === "announce" || trimmed === "webhook") {
|
||||
return trimmed;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveFailureDestination(
|
||||
job: CronJob,
|
||||
globalConfig?: CronFailureDestinationConfig,
|
||||
): CronFailureDeliveryPlan | null {
|
||||
const delivery = job.delivery;
|
||||
const jobFailureDest = delivery?.failureDestination as CronFailureDestinationInput | undefined;
|
||||
const hasJobFailureDest = jobFailureDest && typeof jobFailureDest === "object";
|
||||
|
||||
let channel: CronMessageChannel | undefined;
|
||||
let to: string | undefined;
|
||||
let accountId: string | undefined;
|
||||
let mode: "announce" | "webhook" | undefined;
|
||||
|
||||
// Start with global config as base
|
||||
if (globalConfig) {
|
||||
channel = normalizeChannel(globalConfig.channel);
|
||||
to = normalizeTo(globalConfig.to);
|
||||
accountId = normalizeAccountId(globalConfig.accountId);
|
||||
mode = normalizeFailureMode(globalConfig.mode);
|
||||
}
|
||||
|
||||
// Override with job-level values if present
|
||||
if (hasJobFailureDest) {
|
||||
const jobChannel = normalizeChannel(jobFailureDest.channel);
|
||||
const jobTo = normalizeTo(jobFailureDest.to);
|
||||
const jobAccountId = normalizeAccountId(jobFailureDest.accountId);
|
||||
const jobMode = normalizeFailureMode(jobFailureDest.mode);
|
||||
const hasJobChannelField = "channel" in jobFailureDest;
|
||||
const hasJobToField = "to" in jobFailureDest;
|
||||
const hasJobAccountIdField = "accountId" in jobFailureDest;
|
||||
|
||||
// Track if 'to' was explicitly set at job level
|
||||
const jobToExplicitValue = hasJobToField && jobTo !== undefined;
|
||||
|
||||
// Respect explicit clears from partial patches.
|
||||
if (hasJobChannelField) {
|
||||
channel = jobChannel;
|
||||
}
|
||||
if (hasJobToField) {
|
||||
to = jobTo;
|
||||
}
|
||||
if (hasJobAccountIdField) {
|
||||
accountId = jobAccountId;
|
||||
}
|
||||
if (jobMode !== undefined) {
|
||||
// Mode was explicitly overridden - clear inherited 'to' since URL semantics differ
|
||||
// between announce (channel recipient) and webhook (HTTP endpoint)
|
||||
// But preserve explicit 'to' that was set at job level
|
||||
// Treat undefined global mode as "announce" for comparison
|
||||
const globalMode = globalConfig?.mode ?? "announce";
|
||||
if (!jobToExplicitValue && globalMode !== jobMode) {
|
||||
to = undefined;
|
||||
}
|
||||
mode = jobMode;
|
||||
}
|
||||
}
|
||||
|
||||
if (!channel && !to && !accountId && !mode) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const resolvedMode = mode ?? "announce";
|
||||
|
||||
// Webhook mode requires a URL
|
||||
if (resolvedMode === "webhook" && !to) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result: CronFailureDeliveryPlan = {
|
||||
mode: resolvedMode,
|
||||
channel: resolvedMode === "announce" ? (channel ?? "last") : undefined,
|
||||
to,
|
||||
accountId,
|
||||
};
|
||||
|
||||
if (delivery && isSameDeliveryTarget(delivery, result)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function isSameDeliveryTarget(
|
||||
delivery: CronDelivery,
|
||||
failurePlan: CronFailureDeliveryPlan,
|
||||
): boolean {
|
||||
const primaryMode = delivery.mode ?? "announce";
|
||||
if (primaryMode === "none") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const primaryChannel = delivery.channel;
|
||||
const primaryTo = delivery.to;
|
||||
const primaryAccountId = delivery.accountId;
|
||||
|
||||
if (failurePlan.mode === "webhook") {
|
||||
return primaryMode === "webhook" && primaryTo === failurePlan.to;
|
||||
}
|
||||
|
||||
const primaryChannelNormalized = primaryChannel ?? "last";
|
||||
const failureChannelNormalized = failurePlan.channel ?? "last";
|
||||
|
||||
return (
|
||||
failureChannelNormalized === primaryChannelNormalized &&
|
||||
failurePlan.to === primaryTo &&
|
||||
failurePlan.accountId === primaryAccountId
|
||||
);
|
||||
}
|
||||
|
||||
const FAILURE_NOTIFICATION_TIMEOUT_MS = 30_000;
|
||||
const cronDeliveryLogger = getChildLogger({ subsystem: "cron-delivery" });
|
||||
|
||||
export async function sendFailureNotificationAnnounce(
|
||||
deps: CliDeps,
|
||||
cfg: OpenClawConfig,
|
||||
agentId: string,
|
||||
jobId: string,
|
||||
target: { channel?: string; to?: string; accountId?: string },
|
||||
message: string,
|
||||
): Promise<void> {
|
||||
const resolvedTarget = await resolveDeliveryTarget(cfg, agentId, {
|
||||
channel: target.channel as CronMessageChannel | undefined,
|
||||
to: target.to,
|
||||
accountId: target.accountId,
|
||||
});
|
||||
|
||||
if (!resolvedTarget.ok) {
|
||||
cronDeliveryLogger.warn(
|
||||
{ error: resolvedTarget.error.message },
|
||||
"cron: failed to resolve failure destination target",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const identity = resolveAgentOutboundIdentity(cfg, agentId);
|
||||
const session = buildOutboundSessionContext({
|
||||
cfg,
|
||||
agentId,
|
||||
sessionKey: `cron:${jobId}:failure`,
|
||||
});
|
||||
|
||||
const abortController = new AbortController();
|
||||
const timeout = setTimeout(() => {
|
||||
abortController.abort();
|
||||
}, FAILURE_NOTIFICATION_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: resolvedTarget.channel,
|
||||
to: resolvedTarget.to,
|
||||
accountId: resolvedTarget.accountId,
|
||||
threadId: resolvedTarget.threadId,
|
||||
payloads: [{ text: message }],
|
||||
session,
|
||||
identity,
|
||||
bestEffort: false,
|
||||
deps: createOutboundSendDeps(deps),
|
||||
abortSignal: abortController.signal,
|
||||
});
|
||||
} catch (err) {
|
||||
cronDeliveryLogger.warn(
|
||||
{
|
||||
err: formatErrorMessage(err),
|
||||
channel: resolvedTarget.channel,
|
||||
to: resolvedTarget.to,
|
||||
},
|
||||
"cron: failure destination announce failed",
|
||||
);
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user