fix(telegram): include replied media files in reply context (#28488)

* fix(telegram): include replied media files in reply context

* fix(telegram): keep reply media fields nullable

* perf(telegram): defer reply-media fetch to debounce flush

* fix(telegram): gate and preserve reply media attachments

* fix(telegram): preserve cached-sticker reply media context

* fix: update changelog for telegram reply-media context fixes (#28488) (thanks @obviyus)
This commit is contained in:
Ayaan Zaidi
2026-02-27 15:16:21 +05:30
committed by GitHub
parent a7929abad8
commit aae90cb036
10 changed files with 376 additions and 30 deletions

View File

@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
- Typing/Cross-channel leakage: unify run-scoped typing suppression for cross-channel/internal-webchat routes, preserve current inbound origin as embedded run message channel context, harden shared typing keepalive with consecutive-failure circuit breaker edge-case handling, and enforce dispatcher completion/idle waits in extension dispatcher callsites (Feishu, Matrix, Mattermost, MSTeams) so typing indicators always clean up on success/error paths. Related: #27647, #27493, #27598. Supersedes/replaces draft PRs: #27640, #27593, #27540.
- Telegram/sendChatAction 401 handling: add bounded exponential backoff + temporary local typing suppression after repeated unauthorized failures to stop unbounded `sendChatAction` retry loops that can trigger Telegram abuse enforcement and bot deletion. (#27415) Thanks @widingmarcus-cyber.
- Telegram/Webhook startup: clarify webhook config guidance, allow `channels.telegram.webhookPort: 0` for ephemeral listener binding, and log both the local listener URL and Telegram-advertised webhook URL with the bound port. (#25732) thanks @huntharo.
- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
- Config/Doctor allowlist safety: reject `dmPolicy: "allowlist"` configs with empty `allowFrom`, add Telegram account-level inheritance-aware validation, and teach `openclaw doctor --fix` to restore missing `allowFrom` entries from pairing-store files when present, preventing silent DM drops after upgrades. (#27936) Thanks @widingmarcus-cyber.
- Browser/Chrome extension handshake: bind relay WS message handling before `onopen` and add non-blocking `connect.challenge` response handling for gateway-style handshake frames, avoiding stuck `…` badge states when challenge frames arrive immediately on connect. Landed from contributor PR #22571 by @pandego. (#22553)
- Browser/Extension relay init: dedupe concurrent same-port relay startup with shared in-flight initialization promises so callers await one startup lifecycle and receive consistent success/failure results. Landed from contributor PR #21277 by @HOYALIM. (Related #20688)

View File

@@ -89,6 +89,8 @@ export type MsgContext = {
MediaTypes?: string[];
/** Telegram sticker metadata (emoji, set name, file IDs, cached description). */
Sticker?: StickerMetadata;
/** True when current-turn sticker media is present in MediaPaths (false for cached-description path). */
StickerMediaIncluded?: boolean;
OutputDir?: string;
OutputBase?: string;
/** Remote host for SCP when media lives on a different machine (e.g., openclaw@192.168.64.3). */

View File

@@ -81,6 +81,24 @@ function hasInboundMedia(msg: Message): boolean {
);
}
function hasReplyTargetMedia(msg: Message): boolean {
const externalReply = (msg as Message & { external_reply?: Message }).external_reply;
const replyTarget = msg.reply_to_message ?? externalReply;
return Boolean(replyTarget && hasInboundMedia(replyTarget));
}
function resolveInboundMediaFileId(msg: Message): string | undefined {
return (
msg.sticker?.file_id ??
msg.photo?.[msg.photo.length - 1]?.file_id ??
msg.video?.file_id ??
msg.video_note?.file_id ??
msg.document?.file_id ??
msg.audio?.file_id ??
msg.voice?.file_id
);
}
export const registerTelegramHandlers = ({
cfg,
accountId,
@@ -198,7 +216,8 @@ export const registerTelegramHandlers = ({
return;
}
if (entries.length === 1) {
await processMessage(last.ctx, last.allMedia, last.storeAllowFrom);
const replyMedia = await resolveReplyMediaForMessage(last.ctx, last.msg);
await processMessage(last.ctx, last.allMedia, last.storeAllowFrom, undefined, replyMedia);
return;
}
const combinedText = entries
@@ -217,11 +236,14 @@ export const registerTelegramHandlers = ({
date: last.msg.date ?? first.msg.date,
});
const messageIdOverride = last.msg.message_id ? String(last.msg.message_id) : undefined;
const syntheticCtx = buildSyntheticContext(baseCtx, syntheticMessage);
const replyMedia = await resolveReplyMediaForMessage(baseCtx, syntheticMessage);
await processMessage(
buildSyntheticContext(baseCtx, syntheticMessage),
syntheticCtx,
combinedMedia,
first.storeAllowFrom,
messageIdOverride ? { messageIdOverride } : undefined,
replyMedia,
);
},
onError: (err) => {
@@ -336,7 +358,8 @@ export const registerTelegramHandlers = ({
}
const storeAllowFrom = await loadStoreAllowFrom();
await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom);
const replyMedia = await resolveReplyMediaForMessage(primaryEntry.ctx, primaryEntry.msg);
await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom, undefined, replyMedia);
} catch (err) {
runtime.error?.(danger(`media group handler failed: ${String(err)}`));
}
@@ -398,6 +421,45 @@ export const registerTelegramHandlers = ({
const loadStoreAllowFrom = async () =>
readChannelAllowFromStore("telegram", process.env, accountId).catch(() => []);
const resolveReplyMediaForMessage = async (
ctx: TelegramContext,
msg: Message,
): Promise<TelegramMediaRef[]> => {
const replyMessage = msg.reply_to_message;
if (!replyMessage || !hasInboundMedia(replyMessage)) {
return [];
}
const replyFileId = resolveInboundMediaFileId(replyMessage);
if (!replyFileId) {
return [];
}
try {
const media = await resolveMedia(
{
message: replyMessage,
me: ctx.me,
getFile: async () => await bot.api.getFile(replyFileId),
},
mediaMaxBytes,
opts.token,
opts.proxyFetch,
);
if (!media) {
return [];
}
return [
{
path: media.path,
contentType: media.contentType,
stickerMetadata: media.stickerMetadata,
},
];
} catch (err) {
logger.warn({ chatId: msg.chat.id, error: String(err) }, "reply media fetch failed");
return [];
}
};
const isAllowlistAuthorized = (
allow: NormalizedAllowFrom,
senderId: string,
@@ -1301,7 +1363,7 @@ export const registerTelegramHandlers = ({
return;
}
if (!event.isGroup && hasInboundMedia(event.msg)) {
if (!event.isGroup && (hasInboundMedia(event.msg) || hasReplyTargetMedia(event.msg))) {
const dmAuthorized = await enforceTelegramDmAccess({
isGroup: event.isGroup,
dmPolicy,

View File

@@ -101,6 +101,7 @@ type ResolveGroupRequireMention = (chatId: string | number) => boolean;
export type BuildTelegramMessageContextParams = {
primaryCtx: TelegramContext;
allMedia: TelegramMediaRef[];
replyMedia?: TelegramMediaRef[];
storeAllowFrom: string[];
options?: TelegramMessageContextOptions;
bot: Bot;
@@ -143,6 +144,7 @@ async function resolveStickerVisionSupport(params: {
export const buildTelegramMessageContext = async ({
primaryCtx,
allMedia,
replyMedia = [],
storeAllowFrom,
options,
bot,
@@ -640,6 +642,8 @@ export const buildTelegramMessageContext = async ({
timestamp: entry.timestamp,
}))
: undefined;
const currentMediaForContext = stickerCacheHit ? [] : allMedia;
const contextMedia = [...currentMediaForContext, ...replyMedia];
const ctxPayload = finalizeInboundContext({
Body: combinedBody,
// Agent prompt should be the raw user text only; metadata/context is provided via system prompt.
@@ -685,26 +689,18 @@ export const buildTelegramMessageContext = async ({
ForwardedDate: forwardOrigin?.date ? forwardOrigin.date * 1000 : undefined,
Timestamp: msg.date ? msg.date * 1000 : undefined,
WasMentioned: isGroup ? effectiveWasMentioned : undefined,
// Filter out cached stickers from media - their description is already in the message body
MediaPath: stickerCacheHit ? undefined : allMedia[0]?.path,
MediaType: stickerCacheHit ? undefined : allMedia[0]?.contentType,
MediaUrl: stickerCacheHit ? undefined : allMedia[0]?.path,
MediaPaths: stickerCacheHit
? undefined
: allMedia.length > 0
? allMedia.map((m) => m.path)
: undefined,
MediaUrls: stickerCacheHit
? undefined
: allMedia.length > 0
? allMedia.map((m) => m.path)
: undefined,
MediaTypes: stickerCacheHit
? undefined
: allMedia.length > 0
? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
// Filter out cached stickers from current-message media; reply media is still valid context.
MediaPath: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
MediaType: contextMedia.length > 0 ? contextMedia[0]?.contentType : undefined,
MediaUrl: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
MediaPaths: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
MediaUrls: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
MediaTypes:
contextMedia.length > 0
? (contextMedia.map((m) => m.contentType).filter(Boolean) as string[])
: undefined,
Sticker: allMedia[0]?.stickerMetadata,
StickerMediaIncluded: allMedia[0]?.stickerMetadata ? !stickerCacheHit : undefined,
...(locationData ? toLocationContext(locationData) : undefined),
CommandAuthorized: commandAuthorized,
// For groups: use resolved forum topic id; for DMs: use raw messageThreadId

View File

@@ -0,0 +1,64 @@
import { describe, expect, it } from "vitest";
import { pruneStickerMediaFromContext } from "./bot-message-dispatch.js";
describe("pruneStickerMediaFromContext", () => {
it("preserves appended reply media while removing primary sticker media", () => {
const ctx = {
MediaPath: "/tmp/sticker.webp",
MediaUrl: "/tmp/sticker.webp",
MediaType: "image/webp",
MediaPaths: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
MediaUrls: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
MediaTypes: ["image/webp", "image/jpeg"],
};
pruneStickerMediaFromContext(ctx);
expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
expect(ctx.MediaType).toBe("image/jpeg");
expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
});
it("clears media fields when sticker is the only media", () => {
const ctx = {
MediaPath: "/tmp/sticker.webp",
MediaUrl: "/tmp/sticker.webp",
MediaType: "image/webp",
MediaPaths: ["/tmp/sticker.webp"],
MediaUrls: ["/tmp/sticker.webp"],
MediaTypes: ["image/webp"],
};
pruneStickerMediaFromContext(ctx);
expect(ctx.MediaPath).toBeUndefined();
expect(ctx.MediaUrl).toBeUndefined();
expect(ctx.MediaType).toBeUndefined();
expect(ctx.MediaPaths).toBeUndefined();
expect(ctx.MediaUrls).toBeUndefined();
expect(ctx.MediaTypes).toBeUndefined();
});
it("does not prune when sticker media is already omitted from context", () => {
const ctx = {
MediaPath: "/tmp/replied.jpg",
MediaUrl: "/tmp/replied.jpg",
MediaType: "image/jpeg",
MediaPaths: ["/tmp/replied.jpg"],
MediaUrls: ["/tmp/replied.jpg"],
MediaTypes: ["image/jpeg"],
};
pruneStickerMediaFromContext(ctx, { stickerMediaIncluded: false });
expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
expect(ctx.MediaType).toBe("image/jpeg");
expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
});
});

View File

@@ -60,6 +60,37 @@ async function resolveStickerVisionSupport(cfg: OpenClawConfig, agentId: string)
}
}
export function pruneStickerMediaFromContext(
ctxPayload: {
MediaPath?: string;
MediaUrl?: string;
MediaType?: string;
MediaPaths?: string[];
MediaUrls?: string[];
MediaTypes?: string[];
},
opts?: { stickerMediaIncluded?: boolean },
) {
if (opts?.stickerMediaIncluded === false) {
return;
}
const nextMediaPaths = Array.isArray(ctxPayload.MediaPaths)
? ctxPayload.MediaPaths.slice(1)
: undefined;
const nextMediaUrls = Array.isArray(ctxPayload.MediaUrls)
? ctxPayload.MediaUrls.slice(1)
: undefined;
const nextMediaTypes = Array.isArray(ctxPayload.MediaTypes)
? ctxPayload.MediaTypes.slice(1)
: undefined;
ctxPayload.MediaPaths = nextMediaPaths && nextMediaPaths.length > 0 ? nextMediaPaths : undefined;
ctxPayload.MediaUrls = nextMediaUrls && nextMediaUrls.length > 0 ? nextMediaUrls : undefined;
ctxPayload.MediaTypes = nextMediaTypes && nextMediaTypes.length > 0 ? nextMediaTypes : undefined;
ctxPayload.MediaPath = ctxPayload.MediaPaths?.[0];
ctxPayload.MediaUrl = ctxPayload.MediaUrls?.[0] ?? ctxPayload.MediaPath;
ctxPayload.MediaType = ctxPayload.MediaTypes?.[0];
}
type DispatchTelegramMessageParams = {
context: TelegramMessageContext;
bot: Bot;
@@ -311,13 +342,10 @@ export const dispatchTelegramMessage = async ({
// Update context to use description instead of image
ctxPayload.Body = formattedDesc;
ctxPayload.BodyForAgent = formattedDesc;
// Clear media paths so native vision doesn't process the image again
ctxPayload.MediaPath = undefined;
ctxPayload.MediaType = undefined;
ctxPayload.MediaUrl = undefined;
ctxPayload.MediaPaths = undefined;
ctxPayload.MediaUrls = undefined;
ctxPayload.MediaTypes = undefined;
// Drop only the sticker attachment; keep replied media context if present.
pruneStickerMediaFromContext(ctxPayload, {
stickerMediaIncluded: ctxPayload.StickerMediaIncluded,
});
}
// Cache the description for future encounters

View File

@@ -52,10 +52,12 @@ export const createTelegramMessageProcessor = (deps: TelegramMessageProcessorDep
allMedia: TelegramMediaRef[],
storeAllowFrom: string[],
options?: { messageIdOverride?: string; forceWasMentioned?: boolean },
replyMedia?: TelegramMediaRef[],
) => {
const context = await buildTelegramMessageContext({
primaryCtx,
allMedia,
replyMedia,
storeAllowFrom,
options,
bot,

View File

@@ -42,6 +42,7 @@ import { resolveThreadSessionKeys } from "../routing/session-key.js";
import type { RuntimeEnv } from "../runtime.js";
import { withTelegramApiErrorLogging } from "./api-logging.js";
import { isSenderAllowed, normalizeDmAllowFromWithStore } from "./bot-access.js";
import type { TelegramMediaRef } from "./bot-message-context.js";
import {
buildCappedTelegramMenuCommands,
buildPluginTelegramMenuCommands,
@@ -101,12 +102,13 @@ export type RegisterTelegramHandlerParams = {
shouldSkipUpdate: (ctx: TelegramUpdateKeyContext) => boolean;
processMessage: (
ctx: TelegramContext,
allMedia: Array<{ path: string; contentType?: string }>,
allMedia: TelegramMediaRef[],
storeAllowFrom: string[],
options?: {
messageIdOverride?: string;
forceWasMentioned?: boolean;
},
replyMedia?: TelegramMediaRef[],
) => Promise<void>;
logger: ReturnType<typeof getChildLogger>;
};

View File

@@ -120,6 +120,7 @@ export const getMeSpy: AnyAsyncMock = vi.fn(async () => ({
export const sendMessageSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 77 }));
export const sendAnimationSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 78 }));
export const sendPhotoSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 79 }));
export const getFileSpy: AnyAsyncMock = vi.fn(async () => ({ file_path: "media/file.jpg" }));
type ApiStub = {
config: { use: (arg: unknown) => void };
@@ -132,6 +133,7 @@ type ApiStub = {
sendMessage: typeof sendMessageSpy;
sendAnimation: typeof sendAnimationSpy;
sendPhoto: typeof sendPhotoSpy;
getFile: typeof getFileSpy;
};
const apiStub: ApiStub = {
@@ -145,6 +147,7 @@ const apiStub: ApiStub = {
sendMessage: sendMessageSpy,
sendAnimation: sendAnimationSpy,
sendPhoto: sendPhotoSpy,
getFile: getFileSpy,
};
vi.mock("grammy", () => ({
@@ -290,6 +293,8 @@ beforeEach(() => {
sendPhotoSpy.mockResolvedValue({ message_id: 79 });
sendMessageSpy.mockReset();
sendMessageSpy.mockResolvedValue({ message_id: 77 });
getFileSpy.mockReset();
getFileSpy.mockResolvedValue({ file_path: "media/file.jpg" });
setMessageReactionSpy.mockReset();
setMessageReactionSpy.mockResolvedValue(undefined);

View File

@@ -11,6 +11,7 @@ import {
commandSpy,
editMessageTextSpy,
enqueueSystemEventSpy,
getFileSpy,
getLoadConfigMock,
getReadChannelAllowFromStoreMock,
getOnHandler,
@@ -404,6 +405,189 @@ describe("createTelegramBot", () => {
expect(payload.ReplyToSender).toBe("Ada");
});
it("includes replied image media in inbound context for text replies", async () => {
onSpy.mockClear();
replySpy.mockClear();
getFileSpy.mockClear();
const fetchSpy = vi.spyOn(globalThis, "fetch").mockImplementation(
async () =>
new Response(new Uint8Array([0x89, 0x50, 0x4e, 0x47]), {
status: 200,
headers: { "content-type": "image/png" },
}),
);
try {
createTelegramBot({ token: "tok" });
const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
await handler({
message: {
chat: { id: 7, type: "private" },
text: "what is in this image?",
date: 1736380800,
reply_to_message: {
message_id: 9001,
photo: [{ file_id: "reply-photo-1" }],
from: { first_name: "Ada" },
},
},
me: { username: "openclaw_bot" },
getFile: async () => ({}),
});
expect(replySpy).toHaveBeenCalledTimes(1);
const payload = replySpy.mock.calls[0][0] as {
MediaPath?: string;
MediaPaths?: string[];
ReplyToBody?: string;
};
expect(payload.ReplyToBody).toBe("<media:image>");
expect(payload.MediaPaths).toHaveLength(1);
expect(payload.MediaPath).toBe(payload.MediaPaths?.[0]);
expect(getFileSpy).toHaveBeenCalledWith("reply-photo-1");
} finally {
fetchSpy.mockRestore();
}
});
it("does not fetch reply media for unauthorized DM replies", async () => {
onSpy.mockClear();
replySpy.mockClear();
getFileSpy.mockClear();
sendMessageSpy.mockClear();
readChannelAllowFromStore.mockResolvedValue([]);
loadConfig.mockReturnValue({
channels: {
telegram: {
dmPolicy: "pairing",
allowFrom: [],
},
},
});
createTelegramBot({ token: "tok" });
const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
await handler({
message: {
chat: { id: 7, type: "private" },
text: "hey",
date: 1736380800,
from: { id: 999, first_name: "Eve" },
reply_to_message: {
message_id: 9001,
photo: [{ file_id: "reply-photo-1" }],
from: { first_name: "Ada" },
},
},
me: { username: "openclaw_bot" },
getFile: async () => ({}),
});
expect(getFileSpy).not.toHaveBeenCalled();
expect(replySpy).not.toHaveBeenCalled();
expect(sendMessageSpy).toHaveBeenCalledTimes(1);
});
it("defers reply media download until debounce flush", async () => {
const DEBOUNCE_MS = 4321;
onSpy.mockClear();
replySpy.mockClear();
getFileSpy.mockClear();
loadConfig.mockReturnValue({
agents: {
defaults: {
envelopeTimezone: "utc",
},
},
messages: {
inbound: {
debounceMs: DEBOUNCE_MS,
},
},
channels: {
telegram: {
dmPolicy: "open",
allowFrom: ["*"],
},
},
});
const fetchSpy = vi.spyOn(globalThis, "fetch").mockImplementation(
async () =>
new Response(new Uint8Array([0x89, 0x50, 0x4e, 0x47]), {
status: 200,
headers: { "content-type": "image/png" },
}),
);
const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout");
try {
createTelegramBot({ token: "tok" });
const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
await handler({
message: {
chat: { id: 7, type: "private" },
text: "first",
date: 1736380800,
message_id: 101,
from: { id: 42, first_name: "Ada" },
reply_to_message: {
message_id: 9001,
photo: [{ file_id: "reply-photo-1" }],
from: { first_name: "Ada" },
},
},
me: { username: "openclaw_bot" },
getFile: async () => ({}),
});
await handler({
message: {
chat: { id: 7, type: "private" },
text: "second",
date: 1736380801,
message_id: 102,
from: { id: 42, first_name: "Ada" },
reply_to_message: {
message_id: 9001,
photo: [{ file_id: "reply-photo-1" }],
from: { first_name: "Ada" },
},
},
me: { username: "openclaw_bot" },
getFile: async () => ({}),
});
expect(replySpy).not.toHaveBeenCalled();
expect(getFileSpy).not.toHaveBeenCalled();
const flushTimerCallIndex = setTimeoutSpy.mock.calls.findLastIndex(
(call) => call[1] === DEBOUNCE_MS,
);
const flushTimer =
flushTimerCallIndex >= 0
? (setTimeoutSpy.mock.calls[flushTimerCallIndex]?.[0] as (() => unknown) | undefined)
: undefined;
if (flushTimerCallIndex >= 0) {
clearTimeout(
setTimeoutSpy.mock.results[flushTimerCallIndex]?.value as ReturnType<typeof setTimeout>,
);
}
expect(flushTimer).toBeTypeOf("function");
await flushTimer?.();
await vi.waitFor(() => {
expect(replySpy).toHaveBeenCalledTimes(1);
});
expect(getFileSpy).toHaveBeenCalledTimes(1);
expect(getFileSpy).toHaveBeenCalledWith("reply-photo-1");
} finally {
setTimeoutSpy.mockRestore();
fetchSpy.mockRestore();
}
});
it("handles quote-only replies without reply metadata", async () => {
onSpy.mockClear();
sendMessageSpy.mockClear();