refactor(telegram): simplify polling restart flow

This commit is contained in:
Peter Steinberger
2026-02-26 03:32:36 +01:00
parent 069bbf9741
commit b786d11fea
2 changed files with 184 additions and 125 deletions

View File

@@ -45,9 +45,8 @@ export function createTelegramRunnerOptions(cfg: OpenClawConfig): RunOptions<unk
},
// Suppress grammY getUpdates stack traces; we log concise errors ourselves.
silent: true,
// Retry transient failures before surfacing errors. Use a generous
// window so the runner survives prolonged outages (e.g. scheduled
// internet downtime) without the outer loop needing to restart it.
// Keep grammY retrying for a long outage window. If polling still
// stops, the outer monitor loop restarts it with backoff.
maxRetryTime: 60 * 60 * 1000,
retryInterval: "exponential",
},
@@ -61,6 +60,8 @@ const TELEGRAM_POLL_RESTART_POLICY = {
jitter: 0.25,
};
type TelegramBot = ReturnType<typeof createTelegramBot>;
const isGetUpdatesConflict = (err: unknown) => {
if (!err || typeof err !== "object") {
return false;
@@ -188,21 +189,11 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
let restartAttempts = 0;
let webhookCleared = false;
const runnerOptions = createTelegramRunnerOptions(cfg);
const waitBeforeRetryOnRecoverableSetupError = async (
err: unknown,
logPrefix: string,
): Promise<boolean> => {
if (opts.abortSignal?.aborted) {
return false;
}
if (!isRecoverableTelegramNetworkError(err, { context: "unknown" })) {
throw err;
}
const waitBeforeRestart = async (buildLine: (delay: string) => string): Promise<boolean> => {
restartAttempts += 1;
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
(opts.runtime?.error ?? console.error)(
`${logPrefix}: ${formatErrorMessage(err)}; retrying in ${formatDurationPrecise(delayMs)}.`,
);
const delay = formatDurationPrecise(delayMs);
log(buildLine(delay));
try {
await sleepWithAbort(delayMs, opts.abortSignal);
} catch (sleepErr) {
@@ -214,10 +205,24 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
return true;
};
while (!opts.abortSignal?.aborted) {
let bot;
const waitBeforeRetryOnRecoverableSetupError = async (
err: unknown,
logPrefix: string,
): Promise<boolean> => {
if (opts.abortSignal?.aborted) {
return false;
}
if (!isRecoverableTelegramNetworkError(err, { context: "unknown" })) {
throw err;
}
return waitBeforeRestart(
(delay) => `${logPrefix}: ${formatErrorMessage(err)}; retrying in ${delay}.`,
);
};
const createPollingBot = async (): Promise<TelegramBot | undefined> => {
try {
bot = createTelegramBot({
return createTelegramBot({
token,
runtime: opts.runtime,
proxyFetch,
@@ -234,31 +239,34 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
"Telegram setup network error",
);
if (!shouldRetry) {
return;
return undefined;
}
continue;
return undefined;
}
};
if (!webhookCleared) {
try {
await withTelegramApiErrorLogging({
operation: "deleteWebhook",
runtime: opts.runtime,
fn: () => bot.api.deleteWebhook({ drop_pending_updates: false }),
});
webhookCleared = true;
} catch (err) {
const shouldRetry = await waitBeforeRetryOnRecoverableSetupError(
err,
"Telegram webhook cleanup failed",
);
if (!shouldRetry) {
return;
}
continue;
}
const ensureWebhookCleanup = async (bot: TelegramBot): Promise<"ready" | "retry" | "exit"> => {
if (webhookCleared) {
return "ready";
}
try {
await withTelegramApiErrorLogging({
operation: "deleteWebhook",
runtime: opts.runtime,
fn: () => bot.api.deleteWebhook({ drop_pending_updates: false }),
});
webhookCleared = true;
return "ready";
} catch (err) {
const shouldRetry = await waitBeforeRetryOnRecoverableSetupError(
err,
"Telegram webhook cleanup failed",
);
return shouldRetry ? "retry" : "exit";
}
};
const runPollingCycle = async (bot: TelegramBot): Promise<"continue" | "exit"> => {
const runner = run(bot, runnerOptions);
activeRunner = runner;
let stopPromise: Promise<void> | undefined;
@@ -280,23 +288,16 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
// runner.task() returns a promise that resolves when the runner stops
await runner.task();
if (opts.abortSignal?.aborted) {
return;
return "exit";
}
// The runner stopped on its own. This can happen when grammY's
// maxRetryTime is exceeded (e.g. prolonged network outage).
// Instead of exiting permanently, restart with backoff so polling
// recovers once connectivity is restored.
restartAttempts += 1;
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
const reason = forceRestarted
? "unhandled network error"
: "runner stopped (maxRetryTime exceeded or graceful stop)";
forceRestarted = false;
log(
`Telegram polling runner stopped (${reason}); restarting in ${formatDurationPrecise(delayMs)}.`,
const shouldRestart = await waitBeforeRestart(
(delay) => `Telegram polling runner stopped (${reason}); restarting in ${delay}.`,
);
await sleepWithAbort(delayMs, opts.abortSignal);
continue;
return shouldRestart ? "continue" : "exit";
} catch (err) {
forceRestarted = false;
if (opts.abortSignal?.aborted) {
@@ -307,25 +308,36 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
if (!isConflict && !isRecoverable) {
throw err;
}
restartAttempts += 1;
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
const reason = isConflict ? "getUpdates conflict" : "network error";
const errMsg = formatErrorMessage(err);
(opts.runtime?.error ?? console.error)(
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationPrecise(delayMs)}.`,
const shouldRestart = await waitBeforeRestart(
(delay) => `Telegram ${reason}: ${errMsg}; retrying in ${delay}.`,
);
try {
await sleepWithAbort(delayMs, opts.abortSignal);
} catch (sleepErr) {
if (opts.abortSignal?.aborted) {
return;
}
throw sleepErr;
}
return shouldRestart ? "continue" : "exit";
} finally {
opts.abortSignal?.removeEventListener("abort", stopOnAbort);
await stopRunner();
}
};
while (!opts.abortSignal?.aborted) {
const bot = await createPollingBot();
if (!bot) {
continue;
}
const cleanupState = await ensureWebhookCleanup(bot);
if (cleanupState === "retry") {
continue;
}
if (cleanupState === "exit") {
return;
}
const state = await runPollingCycle(bot);
if (state === "exit") {
return;
}
}
} finally {
unregisterHandler();