fix(telegram): include replied media files in reply context (#28488)

* fix(telegram): include replied media files in reply context * fix(telegram): keep reply media fields nullable * perf(telegram): defer reply-media fetch to debounce flush * fix(telegram): gate and preserve reply media attachments * fix(telegram): preserve cached-sticker reply media context * fix: update changelog for telegram reply-media context fixes (#28488) (thanks @obviyus)
2026-05-18 01:11:04 +00:00 · 2026-02-27 15:16:21 +05:30
parent a7929abad8
commit aae90cb036
10 changed files with 376 additions and 30 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
 - Typing/Cross-channel leakage: unify run-scoped typing suppression for cross-channel/internal-webchat routes, preserve current inbound origin as embedded run message channel context, harden shared typing keepalive with consecutive-failure circuit breaker edge-case handling, and enforce dispatcher completion/idle waits in extension dispatcher callsites (Feishu, Matrix, Mattermost, MSTeams) so typing indicators always clean up on success/error paths. Related: #27647, #27493, #27598. Supersedes/replaces draft PRs: #27640, #27593, #27540.
 - Telegram/sendChatAction 401 handling: add bounded exponential backoff + temporary local typing suppression after repeated unauthorized failures to stop unbounded `sendChatAction` retry loops that can trigger Telegram abuse enforcement and bot deletion. (#27415) Thanks @widingmarcus-cyber.
 - Telegram/Webhook startup: clarify webhook config guidance, allow `channels.telegram.webhookPort: 0` for ephemeral listener binding, and log both the local listener URL and Telegram-advertised webhook URL with the bound port. (#25732) thanks @huntharo.
+- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
 - Config/Doctor allowlist safety: reject `dmPolicy: "allowlist"` configs with empty `allowFrom`, add Telegram account-level inheritance-aware validation, and teach `openclaw doctor --fix` to restore missing `allowFrom` entries from pairing-store files when present, preventing silent DM drops after upgrades. (#27936) Thanks @widingmarcus-cyber.
 - Browser/Chrome extension handshake: bind relay WS message handling before `onopen` and add non-blocking `connect.challenge` response handling for gateway-style handshake frames, avoiding stuck `…` badge states when challenge frames arrive immediately on connect. Landed from contributor PR #22571 by @pandego. (#22553)
 - Browser/Extension relay init: dedupe concurrent same-port relay startup with shared in-flight initialization promises so callers await one startup lifecycle and receive consistent success/failure results. Landed from contributor PR #21277 by @HOYALIM. (Related #20688)
--- a/src/auto-reply/templating.ts
+++ b/src/auto-reply/templating.ts
@@ -89,6 +89,8 @@ export type MsgContext = {
  MediaTypes?: string[];
  /** Telegram sticker metadata (emoji, set name, file IDs, cached description). */
  Sticker?: StickerMetadata;
+  /** True when current-turn sticker media is present in MediaPaths (false for cached-description path). */
+  StickerMediaIncluded?: boolean;
  OutputDir?: string;
  OutputBase?: string;
  /** Remote host for SCP when media lives on a different machine (e.g., openclaw@192.168.64.3). */
--- a/src/telegram/bot-handlers.ts
+++ b/src/telegram/bot-handlers.ts
@@ -81,6 +81,24 @@ function hasInboundMedia(msg: Message): boolean {
  );
 }

+function hasReplyTargetMedia(msg: Message): boolean {
+  const externalReply = (msg as Message & { external_reply?: Message }).external_reply;
+  const replyTarget = msg.reply_to_message ?? externalReply;
+  return Boolean(replyTarget && hasInboundMedia(replyTarget));
+}
+
+function resolveInboundMediaFileId(msg: Message): string | undefined {
+  return (
+    msg.sticker?.file_id ??
+    msg.photo?.[msg.photo.length - 1]?.file_id ??
+    msg.video?.file_id ??
+    msg.video_note?.file_id ??
+    msg.document?.file_id ??
+    msg.audio?.file_id ??
+    msg.voice?.file_id
+  );
+}
+
 export const registerTelegramHandlers = ({
  cfg,
  accountId,
@@ -198,7 +216,8 @@ export const registerTelegramHandlers = ({
        return;
      }
      if (entries.length === 1) {
-        await processMessage(last.ctx, last.allMedia, last.storeAllowFrom);
+        const replyMedia = await resolveReplyMediaForMessage(last.ctx, last.msg);
+        await processMessage(last.ctx, last.allMedia, last.storeAllowFrom, undefined, replyMedia);
        return;
      }
      const combinedText = entries
@@ -217,11 +236,14 @@ export const registerTelegramHandlers = ({
        date: last.msg.date ?? first.msg.date,
      });
      const messageIdOverride = last.msg.message_id ? String(last.msg.message_id) : undefined;
+      const syntheticCtx = buildSyntheticContext(baseCtx, syntheticMessage);
+      const replyMedia = await resolveReplyMediaForMessage(baseCtx, syntheticMessage);
      await processMessage(
-        buildSyntheticContext(baseCtx, syntheticMessage),
+        syntheticCtx,
        combinedMedia,
        first.storeAllowFrom,
        messageIdOverride ? { messageIdOverride } : undefined,
+        replyMedia,
      );
    },
    onError: (err) => {
@@ -336,7 +358,8 @@ export const registerTelegramHandlers = ({
      }

      const storeAllowFrom = await loadStoreAllowFrom();
-      await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom);
+      const replyMedia = await resolveReplyMediaForMessage(primaryEntry.ctx, primaryEntry.msg);
+      await processMessage(primaryEntry.ctx, allMedia, storeAllowFrom, undefined, replyMedia);
    } catch (err) {
      runtime.error?.(danger(`media group handler failed: ${String(err)}`));
    }
@@ -398,6 +421,45 @@ export const registerTelegramHandlers = ({
  const loadStoreAllowFrom = async () =>
    readChannelAllowFromStore("telegram", process.env, accountId).catch(() => []);

+  const resolveReplyMediaForMessage = async (
+    ctx: TelegramContext,
+    msg: Message,
+  ): Promise<TelegramMediaRef[]> => {
+    const replyMessage = msg.reply_to_message;
+    if (!replyMessage || !hasInboundMedia(replyMessage)) {
+      return [];
+    }
+    const replyFileId = resolveInboundMediaFileId(replyMessage);
+    if (!replyFileId) {
+      return [];
+    }
+    try {
+      const media = await resolveMedia(
+        {
+          message: replyMessage,
+          me: ctx.me,
+          getFile: async () => await bot.api.getFile(replyFileId),
+        },
+        mediaMaxBytes,
+        opts.token,
+        opts.proxyFetch,
+      );
+      if (!media) {
+        return [];
+      }
+      return [
+        {
+          path: media.path,
+          contentType: media.contentType,
+          stickerMetadata: media.stickerMetadata,
+        },
+      ];
+    } catch (err) {
+      logger.warn({ chatId: msg.chat.id, error: String(err) }, "reply media fetch failed");
+      return [];
+    }
+  };
+
  const isAllowlistAuthorized = (
    allow: NormalizedAllowFrom,
    senderId: string,
@@ -1301,7 +1363,7 @@ export const registerTelegramHandlers = ({
        return;
      }

-      if (!event.isGroup && hasInboundMedia(event.msg)) {
+      if (!event.isGroup && (hasInboundMedia(event.msg) || hasReplyTargetMedia(event.msg))) {
        const dmAuthorized = await enforceTelegramDmAccess({
          isGroup: event.isGroup,
          dmPolicy,
--- a/src/telegram/bot-message-context.ts
+++ b/src/telegram/bot-message-context.ts
@@ -101,6 +101,7 @@ type ResolveGroupRequireMention = (chatId: string | number) => boolean;
 export type BuildTelegramMessageContextParams = {
  primaryCtx: TelegramContext;
  allMedia: TelegramMediaRef[];
+  replyMedia?: TelegramMediaRef[];
  storeAllowFrom: string[];
  options?: TelegramMessageContextOptions;
  bot: Bot;
@@ -143,6 +144,7 @@ async function resolveStickerVisionSupport(params: {
 export const buildTelegramMessageContext = async ({
  primaryCtx,
  allMedia,
+  replyMedia = [],
  storeAllowFrom,
  options,
  bot,
@@ -640,6 +642,8 @@ export const buildTelegramMessageContext = async ({
          timestamp: entry.timestamp,
        }))
      : undefined;
+  const currentMediaForContext = stickerCacheHit ? [] : allMedia;
+  const contextMedia = [...currentMediaForContext, ...replyMedia];
  const ctxPayload = finalizeInboundContext({
    Body: combinedBody,
    // Agent prompt should be the raw user text only; metadata/context is provided via system prompt.
@@ -685,26 +689,18 @@ export const buildTelegramMessageContext = async ({
    ForwardedDate: forwardOrigin?.date ? forwardOrigin.date * 1000 : undefined,
    Timestamp: msg.date ? msg.date * 1000 : undefined,
    WasMentioned: isGroup ? effectiveWasMentioned : undefined,
-    // Filter out cached stickers from media - their description is already in the message body
-    MediaPath: stickerCacheHit ? undefined : allMedia[0]?.path,
-    MediaType: stickerCacheHit ? undefined : allMedia[0]?.contentType,
-    MediaUrl: stickerCacheHit ? undefined : allMedia[0]?.path,
-    MediaPaths: stickerCacheHit
-      ? undefined
-      : allMedia.length > 0
-        ? allMedia.map((m) => m.path)
-        : undefined,
-    MediaUrls: stickerCacheHit
-      ? undefined
-      : allMedia.length > 0
-        ? allMedia.map((m) => m.path)
-        : undefined,
-    MediaTypes: stickerCacheHit
-      ? undefined
-      : allMedia.length > 0
-        ? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
+    // Filter out cached stickers from current-message media; reply media is still valid context.
+    MediaPath: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
+    MediaType: contextMedia.length > 0 ? contextMedia[0]?.contentType : undefined,
+    MediaUrl: contextMedia.length > 0 ? contextMedia[0]?.path : undefined,
+    MediaPaths: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
+    MediaUrls: contextMedia.length > 0 ? contextMedia.map((m) => m.path) : undefined,
+    MediaTypes:
+      contextMedia.length > 0
+        ? (contextMedia.map((m) => m.contentType).filter(Boolean) as string[])
        : undefined,
    Sticker: allMedia[0]?.stickerMetadata,
+    StickerMediaIncluded: allMedia[0]?.stickerMetadata ? !stickerCacheHit : undefined,
    ...(locationData ? toLocationContext(locationData) : undefined),
    CommandAuthorized: commandAuthorized,
    // For groups: use resolved forum topic id; for DMs: use raw messageThreadId
--- a/src/telegram/bot-message-dispatch.sticker-media.test.ts
+++ b/src/telegram/bot-message-dispatch.sticker-media.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from "vitest";
+import { pruneStickerMediaFromContext } from "./bot-message-dispatch.js";
+
+describe("pruneStickerMediaFromContext", () => {
+  it("preserves appended reply media while removing primary sticker media", () => {
+    const ctx = {
+      MediaPath: "/tmp/sticker.webp",
+      MediaUrl: "/tmp/sticker.webp",
+      MediaType: "image/webp",
+      MediaPaths: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
+      MediaUrls: ["/tmp/sticker.webp", "/tmp/replied.jpg"],
+      MediaTypes: ["image/webp", "image/jpeg"],
+    };
+
+    pruneStickerMediaFromContext(ctx);
+
+    expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
+    expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
+    expect(ctx.MediaType).toBe("image/jpeg");
+    expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
+    expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
+    expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
+  });
+
+  it("clears media fields when sticker is the only media", () => {
+    const ctx = {
+      MediaPath: "/tmp/sticker.webp",
+      MediaUrl: "/tmp/sticker.webp",
+      MediaType: "image/webp",
+      MediaPaths: ["/tmp/sticker.webp"],
+      MediaUrls: ["/tmp/sticker.webp"],
+      MediaTypes: ["image/webp"],
+    };
+
+    pruneStickerMediaFromContext(ctx);
+
+    expect(ctx.MediaPath).toBeUndefined();
+    expect(ctx.MediaUrl).toBeUndefined();
+    expect(ctx.MediaType).toBeUndefined();
+    expect(ctx.MediaPaths).toBeUndefined();
+    expect(ctx.MediaUrls).toBeUndefined();
+    expect(ctx.MediaTypes).toBeUndefined();
+  });
+
+  it("does not prune when sticker media is already omitted from context", () => {
+    const ctx = {
+      MediaPath: "/tmp/replied.jpg",
+      MediaUrl: "/tmp/replied.jpg",
+      MediaType: "image/jpeg",
+      MediaPaths: ["/tmp/replied.jpg"],
+      MediaUrls: ["/tmp/replied.jpg"],
+      MediaTypes: ["image/jpeg"],
+    };
+
+    pruneStickerMediaFromContext(ctx, { stickerMediaIncluded: false });
+
+    expect(ctx.MediaPath).toBe("/tmp/replied.jpg");
+    expect(ctx.MediaUrl).toBe("/tmp/replied.jpg");
+    expect(ctx.MediaType).toBe("image/jpeg");
+    expect(ctx.MediaPaths).toEqual(["/tmp/replied.jpg"]);
+    expect(ctx.MediaUrls).toEqual(["/tmp/replied.jpg"]);
+    expect(ctx.MediaTypes).toEqual(["image/jpeg"]);
+  });
+});
--- a/src/telegram/bot-message-dispatch.ts
+++ b/src/telegram/bot-message-dispatch.ts
@@ -60,6 +60,37 @@ async function resolveStickerVisionSupport(cfg: OpenClawConfig, agentId: string)
  }
 }

+export function pruneStickerMediaFromContext(
+  ctxPayload: {
+    MediaPath?: string;
+    MediaUrl?: string;
+    MediaType?: string;
+    MediaPaths?: string[];
+    MediaUrls?: string[];
+    MediaTypes?: string[];
+  },
+  opts?: { stickerMediaIncluded?: boolean },
+) {
+  if (opts?.stickerMediaIncluded === false) {
+    return;
+  }
+  const nextMediaPaths = Array.isArray(ctxPayload.MediaPaths)
+    ? ctxPayload.MediaPaths.slice(1)
+    : undefined;
+  const nextMediaUrls = Array.isArray(ctxPayload.MediaUrls)
+    ? ctxPayload.MediaUrls.slice(1)
+    : undefined;
+  const nextMediaTypes = Array.isArray(ctxPayload.MediaTypes)
+    ? ctxPayload.MediaTypes.slice(1)
+    : undefined;
+  ctxPayload.MediaPaths = nextMediaPaths && nextMediaPaths.length > 0 ? nextMediaPaths : undefined;
+  ctxPayload.MediaUrls = nextMediaUrls && nextMediaUrls.length > 0 ? nextMediaUrls : undefined;
+  ctxPayload.MediaTypes = nextMediaTypes && nextMediaTypes.length > 0 ? nextMediaTypes : undefined;
+  ctxPayload.MediaPath = ctxPayload.MediaPaths?.[0];
+  ctxPayload.MediaUrl = ctxPayload.MediaUrls?.[0] ?? ctxPayload.MediaPath;
+  ctxPayload.MediaType = ctxPayload.MediaTypes?.[0];
+}
+
 type DispatchTelegramMessageParams = {
  context: TelegramMessageContext;
  bot: Bot;
@@ -311,13 +342,10 @@ export const dispatchTelegramMessage = async ({
        // Update context to use description instead of image
        ctxPayload.Body = formattedDesc;
        ctxPayload.BodyForAgent = formattedDesc;
-        // Clear media paths so native vision doesn't process the image again
-        ctxPayload.MediaPath = undefined;
-        ctxPayload.MediaType = undefined;
-        ctxPayload.MediaUrl = undefined;
-        ctxPayload.MediaPaths = undefined;
-        ctxPayload.MediaUrls = undefined;
-        ctxPayload.MediaTypes = undefined;
+        // Drop only the sticker attachment; keep replied media context if present.
+        pruneStickerMediaFromContext(ctxPayload, {
+          stickerMediaIncluded: ctxPayload.StickerMediaIncluded,
+        });
      }

      // Cache the description for future encounters
--- a/src/telegram/bot-message.ts
+++ b/src/telegram/bot-message.ts
@@ -52,10 +52,12 @@ export const createTelegramMessageProcessor = (deps: TelegramMessageProcessorDep
    allMedia: TelegramMediaRef[],
    storeAllowFrom: string[],
    options?: { messageIdOverride?: string; forceWasMentioned?: boolean },
+    replyMedia?: TelegramMediaRef[],
  ) => {
    const context = await buildTelegramMessageContext({
      primaryCtx,
      allMedia,
+      replyMedia,
      storeAllowFrom,
      options,
      bot,
--- a/src/telegram/bot-native-commands.ts
+++ b/src/telegram/bot-native-commands.ts
@@ -42,6 +42,7 @@ import { resolveThreadSessionKeys } from "../routing/session-key.js";
 import type { RuntimeEnv } from "../runtime.js";
 import { withTelegramApiErrorLogging } from "./api-logging.js";
 import { isSenderAllowed, normalizeDmAllowFromWithStore } from "./bot-access.js";
+import type { TelegramMediaRef } from "./bot-message-context.js";
 import {
  buildCappedTelegramMenuCommands,
  buildPluginTelegramMenuCommands,
@@ -101,12 +102,13 @@ export type RegisterTelegramHandlerParams = {
  shouldSkipUpdate: (ctx: TelegramUpdateKeyContext) => boolean;
  processMessage: (
    ctx: TelegramContext,
-    allMedia: Array<{ path: string; contentType?: string }>,
+    allMedia: TelegramMediaRef[],
    storeAllowFrom: string[],
    options?: {
      messageIdOverride?: string;
      forceWasMentioned?: boolean;
    },
+    replyMedia?: TelegramMediaRef[],
  ) => Promise<void>;
  logger: ReturnType<typeof getChildLogger>;
 };
--- a/src/telegram/bot.create-telegram-bot.test-harness.ts
+++ b/src/telegram/bot.create-telegram-bot.test-harness.ts
@@ -120,6 +120,7 @@ export const getMeSpy: AnyAsyncMock = vi.fn(async () => ({
 export const sendMessageSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 77 }));
 export const sendAnimationSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 78 }));
 export const sendPhotoSpy: AnyAsyncMock = vi.fn(async () => ({ message_id: 79 }));
+export const getFileSpy: AnyAsyncMock = vi.fn(async () => ({ file_path: "media/file.jpg" }));

 type ApiStub = {
  config: { use: (arg: unknown) => void };
@@ -132,6 +133,7 @@ type ApiStub = {
  sendMessage: typeof sendMessageSpy;
  sendAnimation: typeof sendAnimationSpy;
  sendPhoto: typeof sendPhotoSpy;
+  getFile: typeof getFileSpy;
 };

 const apiStub: ApiStub = {
@@ -145,6 +147,7 @@ const apiStub: ApiStub = {
  sendMessage: sendMessageSpy,
  sendAnimation: sendAnimationSpy,
  sendPhoto: sendPhotoSpy,
+  getFile: getFileSpy,
 };

 vi.mock("grammy", () => ({
@@ -290,6 +293,8 @@ beforeEach(() => {
  sendPhotoSpy.mockResolvedValue({ message_id: 79 });
  sendMessageSpy.mockReset();
  sendMessageSpy.mockResolvedValue({ message_id: 77 });
+  getFileSpy.mockReset();
+  getFileSpy.mockResolvedValue({ file_path: "media/file.jpg" });

  setMessageReactionSpy.mockReset();
  setMessageReactionSpy.mockResolvedValue(undefined);
--- a/src/telegram/bot.test.ts
+++ b/src/telegram/bot.test.ts
@@ -11,6 +11,7 @@ import {
  commandSpy,
  editMessageTextSpy,
  enqueueSystemEventSpy,
+  getFileSpy,
  getLoadConfigMock,
  getReadChannelAllowFromStoreMock,
  getOnHandler,
@@ -404,6 +405,189 @@ describe("createTelegramBot", () => {
    expect(payload.ReplyToSender).toBe("Ada");
  });

+  it("includes replied image media in inbound context for text replies", async () => {
+    onSpy.mockClear();
+    replySpy.mockClear();
+    getFileSpy.mockClear();
+
+    const fetchSpy = vi.spyOn(globalThis, "fetch").mockImplementation(
+      async () =>
+        new Response(new Uint8Array([0x89, 0x50, 0x4e, 0x47]), {
+          status: 200,
+          headers: { "content-type": "image/png" },
+        }),
+    );
+    try {
+      createTelegramBot({ token: "tok" });
+      const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
+
+      await handler({
+        message: {
+          chat: { id: 7, type: "private" },
+          text: "what is in this image?",
+          date: 1736380800,
+          reply_to_message: {
+            message_id: 9001,
+            photo: [{ file_id: "reply-photo-1" }],
+            from: { first_name: "Ada" },
+          },
+        },
+        me: { username: "openclaw_bot" },
+        getFile: async () => ({}),
+      });
+
+      expect(replySpy).toHaveBeenCalledTimes(1);
+      const payload = replySpy.mock.calls[0][0] as {
+        MediaPath?: string;
+        MediaPaths?: string[];
+        ReplyToBody?: string;
+      };
+      expect(payload.ReplyToBody).toBe("<media:image>");
+      expect(payload.MediaPaths).toHaveLength(1);
+      expect(payload.MediaPath).toBe(payload.MediaPaths?.[0]);
+      expect(getFileSpy).toHaveBeenCalledWith("reply-photo-1");
+    } finally {
+      fetchSpy.mockRestore();
+    }
+  });
+
+  it("does not fetch reply media for unauthorized DM replies", async () => {
+    onSpy.mockClear();
+    replySpy.mockClear();
+    getFileSpy.mockClear();
+    sendMessageSpy.mockClear();
+    readChannelAllowFromStore.mockResolvedValue([]);
+    loadConfig.mockReturnValue({
+      channels: {
+        telegram: {
+          dmPolicy: "pairing",
+          allowFrom: [],
+        },
+      },
+    });
+
+    createTelegramBot({ token: "tok" });
+    const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
+
+    await handler({
+      message: {
+        chat: { id: 7, type: "private" },
+        text: "hey",
+        date: 1736380800,
+        from: { id: 999, first_name: "Eve" },
+        reply_to_message: {
+          message_id: 9001,
+          photo: [{ file_id: "reply-photo-1" }],
+          from: { first_name: "Ada" },
+        },
+      },
+      me: { username: "openclaw_bot" },
+      getFile: async () => ({}),
+    });
+
+    expect(getFileSpy).not.toHaveBeenCalled();
+    expect(replySpy).not.toHaveBeenCalled();
+    expect(sendMessageSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("defers reply media download until debounce flush", async () => {
+    const DEBOUNCE_MS = 4321;
+    onSpy.mockClear();
+    replySpy.mockClear();
+    getFileSpy.mockClear();
+    loadConfig.mockReturnValue({
+      agents: {
+        defaults: {
+          envelopeTimezone: "utc",
+        },
+      },
+      messages: {
+        inbound: {
+          debounceMs: DEBOUNCE_MS,
+        },
+      },
+      channels: {
+        telegram: {
+          dmPolicy: "open",
+          allowFrom: ["*"],
+        },
+      },
+    });
+
+    const fetchSpy = vi.spyOn(globalThis, "fetch").mockImplementation(
+      async () =>
+        new Response(new Uint8Array([0x89, 0x50, 0x4e, 0x47]), {
+          status: 200,
+          headers: { "content-type": "image/png" },
+        }),
+    );
+    const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout");
+    try {
+      createTelegramBot({ token: "tok" });
+      const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
+
+      await handler({
+        message: {
+          chat: { id: 7, type: "private" },
+          text: "first",
+          date: 1736380800,
+          message_id: 101,
+          from: { id: 42, first_name: "Ada" },
+          reply_to_message: {
+            message_id: 9001,
+            photo: [{ file_id: "reply-photo-1" }],
+            from: { first_name: "Ada" },
+          },
+        },
+        me: { username: "openclaw_bot" },
+        getFile: async () => ({}),
+      });
+      await handler({
+        message: {
+          chat: { id: 7, type: "private" },
+          text: "second",
+          date: 1736380801,
+          message_id: 102,
+          from: { id: 42, first_name: "Ada" },
+          reply_to_message: {
+            message_id: 9001,
+            photo: [{ file_id: "reply-photo-1" }],
+            from: { first_name: "Ada" },
+          },
+        },
+        me: { username: "openclaw_bot" },
+        getFile: async () => ({}),
+      });
+
+      expect(replySpy).not.toHaveBeenCalled();
+      expect(getFileSpy).not.toHaveBeenCalled();
+
+      const flushTimerCallIndex = setTimeoutSpy.mock.calls.findLastIndex(
+        (call) => call[1] === DEBOUNCE_MS,
+      );
+      const flushTimer =
+        flushTimerCallIndex >= 0
+          ? (setTimeoutSpy.mock.calls[flushTimerCallIndex]?.[0] as (() => unknown) | undefined)
+          : undefined;
+      if (flushTimerCallIndex >= 0) {
+        clearTimeout(
+          setTimeoutSpy.mock.results[flushTimerCallIndex]?.value as ReturnType<typeof setTimeout>,
+        );
+      }
+      expect(flushTimer).toBeTypeOf("function");
+      await flushTimer?.();
+      await vi.waitFor(() => {
+        expect(replySpy).toHaveBeenCalledTimes(1);
+      });
+
+      expect(getFileSpy).toHaveBeenCalledTimes(1);
+      expect(getFileSpy).toHaveBeenCalledWith("reply-photo-1");
+    } finally {
+      setTimeoutSpy.mockRestore();
+      fetchSpy.mockRestore();
+    }
+  });
+
  it("handles quote-only replies without reply metadata", async () => {
    onSpy.mockClear();
    sendMessageSpy.mockClear();