mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 12:08:37 +00:00
Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: ql-wade <262266039+ql-wade@users.noreply.github.com>
134 lines
4.5 KiB
TypeScript
134 lines
4.5 KiB
TypeScript
import type { ChannelId } from "../channels/plugins/types.js";
|
|
|
|
export type ChannelHealthSnapshot = {
|
|
running?: boolean;
|
|
connected?: boolean;
|
|
enabled?: boolean;
|
|
configured?: boolean;
|
|
restartPending?: boolean;
|
|
busy?: boolean;
|
|
activeRuns?: number;
|
|
lastRunActivityAt?: number | null;
|
|
lastEventAt?: number | null;
|
|
lastStartAt?: number | null;
|
|
reconnectAttempts?: number;
|
|
};
|
|
|
|
export type ChannelHealthEvaluationReason =
|
|
| "healthy"
|
|
| "unmanaged"
|
|
| "not-running"
|
|
| "busy"
|
|
| "stuck"
|
|
| "startup-connect-grace"
|
|
| "disconnected"
|
|
| "stale-socket";
|
|
|
|
export type ChannelHealthEvaluation = {
|
|
healthy: boolean;
|
|
reason: ChannelHealthEvaluationReason;
|
|
};
|
|
|
|
export type ChannelHealthPolicy = {
|
|
channelId: ChannelId;
|
|
now: number;
|
|
staleEventThresholdMs: number;
|
|
channelConnectGraceMs: number;
|
|
};
|
|
|
|
export type ChannelRestartReason = "gave-up" | "stopped" | "stale-socket" | "stuck";
|
|
|
|
function isManagedAccount(snapshot: ChannelHealthSnapshot): boolean {
|
|
return snapshot.enabled !== false && snapshot.configured !== false;
|
|
}
|
|
|
|
const BUSY_ACTIVITY_STALE_THRESHOLD_MS = 25 * 60_000;
|
|
// Keep these shared between the background health monitor and on-demand readiness
|
|
// probes so both surfaces evaluate channel lifecycle windows consistently.
|
|
export const DEFAULT_CHANNEL_STALE_EVENT_THRESHOLD_MS = 30 * 60_000;
|
|
export const DEFAULT_CHANNEL_CONNECT_GRACE_MS = 120_000;
|
|
|
|
export function evaluateChannelHealth(
|
|
snapshot: ChannelHealthSnapshot,
|
|
policy: ChannelHealthPolicy,
|
|
): ChannelHealthEvaluation {
|
|
if (!isManagedAccount(snapshot)) {
|
|
return { healthy: true, reason: "unmanaged" };
|
|
}
|
|
if (!snapshot.running) {
|
|
return { healthy: false, reason: "not-running" };
|
|
}
|
|
const activeRuns =
|
|
typeof snapshot.activeRuns === "number" && Number.isFinite(snapshot.activeRuns)
|
|
? Math.max(0, Math.trunc(snapshot.activeRuns))
|
|
: 0;
|
|
const isBusy = snapshot.busy === true || activeRuns > 0;
|
|
const lastStartAt =
|
|
typeof snapshot.lastStartAt === "number" && Number.isFinite(snapshot.lastStartAt)
|
|
? snapshot.lastStartAt
|
|
: null;
|
|
const lastRunActivityAt =
|
|
typeof snapshot.lastRunActivityAt === "number" && Number.isFinite(snapshot.lastRunActivityAt)
|
|
? snapshot.lastRunActivityAt
|
|
: null;
|
|
const busyStateInitializedForLifecycle =
|
|
lastStartAt == null || (lastRunActivityAt != null && lastRunActivityAt >= lastStartAt);
|
|
|
|
// Runtime snapshots are patch-merged, so a restarted lifecycle can temporarily
|
|
// inherit stale busy fields from the previous instance. Ignore busy short-circuit
|
|
// until run activity is known to belong to the current lifecycle.
|
|
if (isBusy) {
|
|
if (!busyStateInitializedForLifecycle) {
|
|
// Fall through to normal startup/disconnect checks below.
|
|
} else {
|
|
const runActivityAge =
|
|
lastRunActivityAt == null
|
|
? Number.POSITIVE_INFINITY
|
|
: Math.max(0, policy.now - lastRunActivityAt);
|
|
if (runActivityAge < BUSY_ACTIVITY_STALE_THRESHOLD_MS) {
|
|
return { healthy: true, reason: "busy" };
|
|
}
|
|
return { healthy: false, reason: "stuck" };
|
|
}
|
|
}
|
|
if (snapshot.lastStartAt != null) {
|
|
const upDuration = policy.now - snapshot.lastStartAt;
|
|
if (upDuration < policy.channelConnectGraceMs) {
|
|
return { healthy: true, reason: "startup-connect-grace" };
|
|
}
|
|
}
|
|
if (snapshot.connected === false) {
|
|
return { healthy: false, reason: "disconnected" };
|
|
}
|
|
// Skip stale-socket check for Telegram (long-polling mode). Each polling request
|
|
// acts as a heartbeat, so the half-dead WebSocket scenario this check is designed
|
|
// to catch does not apply to Telegram's long-polling architecture.
|
|
if (policy.channelId !== "telegram") {
|
|
if (snapshot.lastEventAt != null || snapshot.lastStartAt != null) {
|
|
const upSince = snapshot.lastStartAt ?? 0;
|
|
const upDuration = policy.now - upSince;
|
|
if (upDuration > policy.staleEventThresholdMs) {
|
|
const lastEvent = snapshot.lastEventAt ?? 0;
|
|
const eventAge = policy.now - lastEvent;
|
|
if (eventAge > policy.staleEventThresholdMs) {
|
|
return { healthy: false, reason: "stale-socket" };
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return { healthy: true, reason: "healthy" };
|
|
}
|
|
|
|
export function resolveChannelRestartReason(
|
|
snapshot: ChannelHealthSnapshot,
|
|
evaluation: ChannelHealthEvaluation,
|
|
): ChannelRestartReason {
|
|
if (evaluation.reason === "stale-socket") {
|
|
return "stale-socket";
|
|
}
|
|
if (evaluation.reason === "not-running") {
|
|
return snapshot.reconnectAttempts && snapshot.reconnectAttempts >= 10 ? "gave-up" : "stopped";
|
|
}
|
|
return "stuck";
|
|
}
|