feat: detect stale Slack sockets and auto-restart (#30153)

* feat: detect stale Slack sockets and auto-restart

Slack Socket Mode connections can silently stop delivering events while
still appearing connected (health checks pass, WebSocket stays open).
This "half-dead socket" problem causes messages to go unanswered.

This commit adds two layers of protection:

1. **Event liveness tracking**: Every inbound Slack event (messages,
   reactions, member joins/leaves, channel events, pins) now calls
   `setStatus({ lastEventAt, lastInboundAt })` to update the channel
   account snapshot with the timestamp of the last received event.

2. **Health monitor stale socket detection**: The channel health monitor
   now checks `lastEventAt` against a configurable threshold (default
   30 minutes). If a channel has been running longer than the threshold
   and hasn't received any events in that window, it is flagged as
   unhealthy and automatically restarted — the same way disconnected
   or crashed channels are already handled.

The restart reason is logged as "stale-socket" for observability, and
the existing cooldown/rate-limit logic (3 restarts/hour max) prevents
restart storms.

* Slack: gate liveness tracking to accepted events

---------

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Dennis Rankin
2026-03-01 08:58:21 -08:00
committed by GitHub
parent 43ddb41354
commit a28a4b1b61
16 changed files with 544 additions and 62 deletions

View File

@@ -0,0 +1,116 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { createSlackMessageHandler } from "./message-handler.js";
const enqueueMock = vi.fn(async (_entry: unknown) => {});
const resolveThreadTsMock = vi.fn(async ({ message }: { message: Record<string, unknown> }) => ({
...message,
}));
vi.mock("../../auto-reply/inbound-debounce.js", () => ({
resolveInboundDebounceMs: () => 10,
createInboundDebouncer: () => ({
enqueue: (entry: unknown) => enqueueMock(entry),
}),
}));
vi.mock("./thread-resolution.js", () => ({
createSlackThreadTsResolver: () => ({
resolve: (entry: { message: Record<string, unknown> }) => resolveThreadTsMock(entry),
}),
}));
function createContext(overrides?: {
markMessageSeen?: (channel: string | undefined, ts: string | undefined) => boolean;
}) {
return {
cfg: {},
accountId: "default",
app: {
client: {},
},
runtime: {},
markMessageSeen: (channel: string | undefined, ts: string | undefined) =>
overrides?.markMessageSeen?.(channel, ts) ?? false,
} as Parameters<typeof createSlackMessageHandler>[0]["ctx"];
}
describe("createSlackMessageHandler", () => {
beforeEach(() => {
enqueueMock.mockClear();
resolveThreadTsMock.mockClear();
});
it("does not track invalid non-message events from the message stream", async () => {
const trackEvent = vi.fn();
const handler = createSlackMessageHandler({
ctx: createContext(),
account: { accountId: "default" } as Parameters<
typeof createSlackMessageHandler
>[0]["account"],
trackEvent,
});
await handler(
{
type: "reaction_added",
channel: "D1",
ts: "123.456",
} as never,
{ source: "message" },
);
expect(trackEvent).not.toHaveBeenCalled();
expect(resolveThreadTsMock).not.toHaveBeenCalled();
expect(enqueueMock).not.toHaveBeenCalled();
});
it("does not track duplicate messages that are already seen", async () => {
const trackEvent = vi.fn();
const handler = createSlackMessageHandler({
ctx: createContext({ markMessageSeen: () => true }),
account: { accountId: "default" } as Parameters<
typeof createSlackMessageHandler
>[0]["account"],
trackEvent,
});
await handler(
{
type: "message",
channel: "D1",
ts: "123.456",
text: "hello",
} as never,
{ source: "message" },
);
expect(trackEvent).not.toHaveBeenCalled();
expect(resolveThreadTsMock).not.toHaveBeenCalled();
expect(enqueueMock).not.toHaveBeenCalled();
});
it("tracks accepted non-duplicate messages", async () => {
const trackEvent = vi.fn();
const handler = createSlackMessageHandler({
ctx: createContext(),
account: { accountId: "default" } as Parameters<
typeof createSlackMessageHandler
>[0]["account"],
trackEvent,
});
await handler(
{
type: "message",
channel: "D1",
ts: "123.456",
text: "hello",
} as never,
{ source: "message" },
);
expect(trackEvent).toHaveBeenCalledTimes(1);
expect(resolveThreadTsMock).toHaveBeenCalledTimes(1);
expect(enqueueMock).toHaveBeenCalledTimes(1);
});
});