mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 06:41:22 +00:00
fix(gateway): harden channel health monitor recovery
This commit is contained in:
@@ -1,13 +1,13 @@
|
||||
import { resolveChannelDefaultAccountId } from "../channels/plugins/helpers.js";
|
||||
import { type ChannelId, getChannelPlugin, listChannelPlugins } from "../channels/plugins/index.js";
|
||||
import type { ChannelAccountSnapshot } from "../channels/plugins/types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { resolveChannelDefaultAccountId } from "../channels/plugins/helpers.js";
|
||||
import { type ChannelId, getChannelPlugin, listChannelPlugins } from "../channels/plugins/index.js";
|
||||
import { type BackoffPolicy, computeBackoff, sleepWithAbort } from "../infra/backoff.js";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import { resetDirectoryCache } from "../infra/outbound/target-resolver.js";
|
||||
import type { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { DEFAULT_ACCOUNT_ID } from "../routing/session-key.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
|
||||
const CHANNEL_RESTART_POLICY: BackoffPolicy = {
|
||||
initialMs: 5_000,
|
||||
@@ -61,6 +61,11 @@ type ChannelManagerOptions = {
|
||||
channelRuntimeEnvs: Record<ChannelId, RuntimeEnv>;
|
||||
};
|
||||
|
||||
type StartChannelOptions = {
|
||||
preserveRestartAttempts?: boolean;
|
||||
preserveManualStop?: boolean;
|
||||
};
|
||||
|
||||
export type ChannelManager = {
|
||||
getRuntimeSnapshot: () => ChannelRuntimeSnapshot;
|
||||
startChannels: () => Promise<void>;
|
||||
@@ -110,12 +115,17 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
return next;
|
||||
};
|
||||
|
||||
const startChannel = async (channelId: ChannelId, accountId?: string) => {
|
||||
const startChannelInternal = async (
|
||||
channelId: ChannelId,
|
||||
accountId?: string,
|
||||
opts: StartChannelOptions = {},
|
||||
) => {
|
||||
const plugin = getChannelPlugin(channelId);
|
||||
const startAccount = plugin?.gateway?.startAccount;
|
||||
if (!startAccount) {
|
||||
return;
|
||||
}
|
||||
const { preserveRestartAttempts = false, preserveManualStop = false } = opts;
|
||||
const cfg = loadConfig();
|
||||
resetDirectoryCache({ channel: channelId, accountId });
|
||||
const store = getStore(channelId);
|
||||
@@ -136,6 +146,8 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
if (!enabled) {
|
||||
setRuntime(channelId, id, {
|
||||
accountId: id,
|
||||
enabled: false,
|
||||
configured: true,
|
||||
running: false,
|
||||
lastError: plugin.config.disabledReason?.(account, cfg) ?? "disabled",
|
||||
});
|
||||
@@ -149,6 +161,8 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
if (!configured) {
|
||||
setRuntime(channelId, id, {
|
||||
accountId: id,
|
||||
enabled: true,
|
||||
configured: false,
|
||||
running: false,
|
||||
lastError: plugin.config.unconfiguredReason?.(account, cfg) ?? "not configured",
|
||||
});
|
||||
@@ -156,17 +170,23 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
}
|
||||
|
||||
const rKey = restartKey(channelId, id);
|
||||
manuallyStopped.delete(rKey);
|
||||
if (!preserveManualStop) {
|
||||
manuallyStopped.delete(rKey);
|
||||
}
|
||||
|
||||
const abort = new AbortController();
|
||||
store.aborts.set(id, abort);
|
||||
restartAttempts.delete(rKey);
|
||||
if (!preserveRestartAttempts) {
|
||||
restartAttempts.delete(rKey);
|
||||
}
|
||||
setRuntime(channelId, id, {
|
||||
accountId: id,
|
||||
enabled: true,
|
||||
configured: true,
|
||||
running: true,
|
||||
lastStartAt: Date.now(),
|
||||
lastError: null,
|
||||
reconnectAttempts: 0,
|
||||
reconnectAttempts: preserveRestartAttempts ? (restartAttempts.get(rKey) ?? 0) : 0,
|
||||
});
|
||||
|
||||
const log = channelLogs[channelId];
|
||||
@@ -180,15 +200,13 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
getStatus: () => getRuntime(channelId, id),
|
||||
setStatus: (next) => setRuntime(channelId, id, next),
|
||||
});
|
||||
const tracked = Promise.resolve(task)
|
||||
const trackedPromise = Promise.resolve(task)
|
||||
.catch((err) => {
|
||||
const message = formatErrorMessage(err);
|
||||
setRuntime(channelId, id, { accountId: id, lastError: message });
|
||||
log.error?.(`[${id}] channel exited: ${message}`);
|
||||
})
|
||||
.finally(() => {
|
||||
store.aborts.delete(id);
|
||||
store.tasks.delete(id);
|
||||
setRuntime(channelId, id, {
|
||||
accountId: id,
|
||||
running: false,
|
||||
@@ -214,17 +232,41 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
reconnectAttempts: attempt,
|
||||
});
|
||||
try {
|
||||
await sleepWithAbort(delayMs);
|
||||
await startChannel(channelId, id);
|
||||
await sleepWithAbort(delayMs, abort.signal);
|
||||
if (manuallyStopped.has(rKey)) {
|
||||
return;
|
||||
}
|
||||
if (store.tasks.get(id) === trackedPromise) {
|
||||
store.tasks.delete(id);
|
||||
}
|
||||
if (store.aborts.get(id) === abort) {
|
||||
store.aborts.delete(id);
|
||||
}
|
||||
await startChannelInternal(channelId, id, {
|
||||
preserveRestartAttempts: true,
|
||||
preserveManualStop: true,
|
||||
});
|
||||
} catch {
|
||||
// abort or startup failure — next crash will retry
|
||||
}
|
||||
})
|
||||
.finally(() => {
|
||||
if (store.tasks.get(id) === trackedPromise) {
|
||||
store.tasks.delete(id);
|
||||
}
|
||||
if (store.aborts.get(id) === abort) {
|
||||
store.aborts.delete(id);
|
||||
}
|
||||
});
|
||||
store.tasks.set(id, tracked);
|
||||
store.tasks.set(id, trackedPromise);
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
const startChannel = async (channelId: ChannelId, accountId?: string) => {
|
||||
await startChannelInternal(channelId, accountId);
|
||||
};
|
||||
|
||||
const stopChannel = async (channelId: ChannelId, accountId?: string) => {
|
||||
const plugin = getChannelPlugin(channelId);
|
||||
const store = getStore(channelId);
|
||||
@@ -333,6 +375,8 @@ export function createChannelManager(opts: ChannelManagerOptions): ChannelManage
|
||||
const configured = described?.configured;
|
||||
const current = store.runtimes.get(id) ?? cloneDefaultRuntime(plugin.id, id);
|
||||
const next = { ...current, accountId: id };
|
||||
next.enabled = enabled;
|
||||
next.configured = typeof configured === "boolean" ? configured : (next.configured ?? true);
|
||||
if (!next.running) {
|
||||
if (!enabled) {
|
||||
next.lastError ??= plugin.config.disabledReason?.(account, cfg) ?? "disabled";
|
||||
|
||||
Reference in New Issue
Block a user