mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 11:08:37 +00:00
fix(gateway): skip stale-socket restarts for Telegram polling (openclaw#38405)
Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: ql-wade <262266039+ql-wade@users.noreply.github.com>
This commit is contained in:
@@ -226,6 +226,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Feishu/reply delivery reliability: disable block streaming in Feishu reply options so plain-text auto-render replies are no longer silently dropped before final delivery. (#38258) Thanks @xinhuagu.
|
- Feishu/reply delivery reliability: disable block streaming in Feishu reply options so plain-text auto-render replies are no longer silently dropped before final delivery. (#38258) Thanks @xinhuagu.
|
||||||
- Agents/reply MEDIA delivery: normalize local assistant `MEDIA:` paths before block/final delivery, keep media dedupe aligned with message-tool sends, and contain malformed media normalization failures so generated files send reliably instead of falling back to empty responses. (#38572) Thanks @obviyus.
|
- Agents/reply MEDIA delivery: normalize local assistant `MEDIA:` paths before block/final delivery, keep media dedupe aligned with message-tool sends, and contain malformed media normalization failures so generated files send reliably instead of falling back to empty responses. (#38572) Thanks @obviyus.
|
||||||
- Sessions/bootstrap cache rollover invalidation: clear cached workspace bootstrap snapshots whenever an existing `sessionKey` rolls to a new `sessionId` across auto-reply, command, and isolated cron session resolvers, so `AGENTS.md`/`MEMORY.md`/`USER.md` updates are reloaded after daily, idle, or forced session resets instead of staying stale until gateway restart. (#38494) Thanks @LivingInDrm.
|
- Sessions/bootstrap cache rollover invalidation: clear cached workspace bootstrap snapshots whenever an existing `sessionKey` rolls to a new `sessionId` across auto-reply, command, and isolated cron session resolvers, so `AGENTS.md`/`MEMORY.md`/`USER.md` updates are reloaded after daily, idle, or forced session resets instead of staying stale until gateway restart. (#38494) Thanks @LivingInDrm.
|
||||||
|
- Gateway/Telegram polling health monitor: skip stale-socket restarts for Telegram long-polling channels and thread channel identity through shared health evaluation so polling connections are not restarted on the WebSocket stale-socket heuristic. (#38395) Thanks @ql-wade and @Takhoffman.
|
||||||
|
|
||||||
## 2026.3.2
|
## 2026.3.2
|
||||||
|
|
||||||
|
|||||||
@@ -122,6 +122,7 @@ export function startChannelHealthMonitor(deps: ChannelHealthMonitorDeps): Chann
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const healthPolicy: ChannelHealthPolicy = {
|
const healthPolicy: ChannelHealthPolicy = {
|
||||||
|
channelId,
|
||||||
now,
|
now,
|
||||||
staleEventThresholdMs: timing.staleEventThresholdMs,
|
staleEventThresholdMs: timing.staleEventThresholdMs,
|
||||||
channelConnectGraceMs: timing.channelConnectGraceMs,
|
channelConnectGraceMs: timing.channelConnectGraceMs,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
configured: true,
|
configured: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now: 100_000,
|
now: 100_000,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -28,6 +29,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
lastStartAt: 95_000,
|
lastStartAt: 95_000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now: 100_000,
|
now: 100_000,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -48,6 +50,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
lastRunActivityAt: now - 30_000,
|
lastRunActivityAt: now - 30_000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now,
|
now,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -68,6 +71,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
lastRunActivityAt: now - 26 * 60_000,
|
lastRunActivityAt: now - 26 * 60_000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now,
|
now,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -90,6 +94,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
lastRunActivityAt: now - 31_000,
|
lastRunActivityAt: now - 31_000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now,
|
now,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -109,6 +114,7 @@ describe("evaluateChannelHealth", () => {
|
|||||||
lastEventAt: null,
|
lastEventAt: null,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
channelId: "discord",
|
||||||
now: 100_000,
|
now: 100_000,
|
||||||
channelConnectGraceMs: 10_000,
|
channelConnectGraceMs: 10_000,
|
||||||
staleEventThresholdMs: 30_000,
|
staleEventThresholdMs: 30_000,
|
||||||
@@ -116,6 +122,26 @@ describe("evaluateChannelHealth", () => {
|
|||||||
);
|
);
|
||||||
expect(evaluation).toEqual({ healthy: false, reason: "stale-socket" });
|
expect(evaluation).toEqual({ healthy: false, reason: "stale-socket" });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("skips stale-socket detection for telegram long-polling channels", () => {
|
||||||
|
const evaluation = evaluateChannelHealth(
|
||||||
|
{
|
||||||
|
running: true,
|
||||||
|
connected: true,
|
||||||
|
enabled: true,
|
||||||
|
configured: true,
|
||||||
|
lastStartAt: 0,
|
||||||
|
lastEventAt: null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
channelId: "telegram",
|
||||||
|
now: 100_000,
|
||||||
|
channelConnectGraceMs: 10_000,
|
||||||
|
staleEventThresholdMs: 30_000,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
expect(evaluation).toEqual({ healthy: true, reason: "healthy" });
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("resolveChannelRestartReason", () => {
|
describe("resolveChannelRestartReason", () => {
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import type { ChannelId } from "../channels/plugins/types.js";
|
||||||
|
|
||||||
export type ChannelHealthSnapshot = {
|
export type ChannelHealthSnapshot = {
|
||||||
running?: boolean;
|
running?: boolean;
|
||||||
connected?: boolean;
|
connected?: boolean;
|
||||||
@@ -28,6 +30,7 @@ export type ChannelHealthEvaluation = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export type ChannelHealthPolicy = {
|
export type ChannelHealthPolicy = {
|
||||||
|
channelId: ChannelId;
|
||||||
now: number;
|
now: number;
|
||||||
staleEventThresholdMs: number;
|
staleEventThresholdMs: number;
|
||||||
channelConnectGraceMs: number;
|
channelConnectGraceMs: number;
|
||||||
@@ -97,14 +100,19 @@ export function evaluateChannelHealth(
|
|||||||
if (snapshot.connected === false) {
|
if (snapshot.connected === false) {
|
||||||
return { healthy: false, reason: "disconnected" };
|
return { healthy: false, reason: "disconnected" };
|
||||||
}
|
}
|
||||||
if (snapshot.lastEventAt != null || snapshot.lastStartAt != null) {
|
// Skip stale-socket check for Telegram (long-polling mode). Each polling request
|
||||||
const upSince = snapshot.lastStartAt ?? 0;
|
// acts as a heartbeat, so the half-dead WebSocket scenario this check is designed
|
||||||
const upDuration = policy.now - upSince;
|
// to catch does not apply to Telegram's long-polling architecture.
|
||||||
if (upDuration > policy.staleEventThresholdMs) {
|
if (policy.channelId !== "telegram") {
|
||||||
const lastEvent = snapshot.lastEventAt ?? 0;
|
if (snapshot.lastEventAt != null || snapshot.lastStartAt != null) {
|
||||||
const eventAge = policy.now - lastEvent;
|
const upSince = snapshot.lastStartAt ?? 0;
|
||||||
if (eventAge > policy.staleEventThresholdMs) {
|
const upDuration = policy.now - upSince;
|
||||||
return { healthy: false, reason: "stale-socket" };
|
if (upDuration > policy.staleEventThresholdMs) {
|
||||||
|
const lastEvent = snapshot.lastEventAt ?? 0;
|
||||||
|
const eventAge = policy.now - lastEvent;
|
||||||
|
if (eventAge > policy.staleEventThresholdMs) {
|
||||||
|
return { healthy: false, reason: "stale-socket" };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -167,6 +167,28 @@ describe("createReadinessChecker", () => {
|
|||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("keeps telegram long-polling channels ready without stale-socket classification", () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
vi.setSystemTime(new Date("2026-03-06T12:00:00Z"));
|
||||||
|
const startedAt = Date.now() - 31 * 60_000;
|
||||||
|
const manager = createManager(
|
||||||
|
snapshotWith({
|
||||||
|
telegram: {
|
||||||
|
running: true,
|
||||||
|
connected: true,
|
||||||
|
enabled: true,
|
||||||
|
configured: true,
|
||||||
|
lastStartAt: startedAt,
|
||||||
|
lastEventAt: null,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const readiness = createReadinessChecker({ channelManager: manager, startedAt });
|
||||||
|
expect(readiness()).toEqual({ ready: true, failing: [], uptimeMs: 1_860_000 });
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
it("caches readiness snapshots briefly to keep repeated probes cheap", () => {
|
it("caches readiness snapshots briefly to keep repeated probes cheap", () => {
|
||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
vi.setSystemTime(new Date("2026-03-06T12:00:00Z"));
|
vi.setSystemTime(new Date("2026-03-06T12:00:00Z"));
|
||||||
|
|||||||
@@ -50,11 +50,6 @@ export function createReadinessChecker(deps: {
|
|||||||
|
|
||||||
const snapshot = channelManager.getRuntimeSnapshot();
|
const snapshot = channelManager.getRuntimeSnapshot();
|
||||||
const failing: string[] = [];
|
const failing: string[] = [];
|
||||||
const policy: ChannelHealthPolicy = {
|
|
||||||
now,
|
|
||||||
staleEventThresholdMs: DEFAULT_CHANNEL_STALE_EVENT_THRESHOLD_MS,
|
|
||||||
channelConnectGraceMs: DEFAULT_CHANNEL_CONNECT_GRACE_MS,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const [channelId, accounts] of Object.entries(snapshot.channelAccounts)) {
|
for (const [channelId, accounts] of Object.entries(snapshot.channelAccounts)) {
|
||||||
if (!accounts) {
|
if (!accounts) {
|
||||||
@@ -64,6 +59,12 @@ export function createReadinessChecker(deps: {
|
|||||||
if (!accountSnapshot) {
|
if (!accountSnapshot) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
const policy: ChannelHealthPolicy = {
|
||||||
|
now,
|
||||||
|
staleEventThresholdMs: DEFAULT_CHANNEL_STALE_EVENT_THRESHOLD_MS,
|
||||||
|
channelConnectGraceMs: DEFAULT_CHANNEL_CONNECT_GRACE_MS,
|
||||||
|
channelId,
|
||||||
|
};
|
||||||
const health = evaluateChannelHealth(accountSnapshot, policy);
|
const health = evaluateChannelHealth(accountSnapshot, policy);
|
||||||
if (!health.healthy && !shouldIgnoreReadinessFailure(accountSnapshot, health)) {
|
if (!health.healthy && !shouldIgnoreReadinessFailure(accountSnapshot, health)) {
|
||||||
failing.push(channelId);
|
failing.push(channelId);
|
||||||
|
|||||||
Reference in New Issue
Block a user