From 2fc479b4274a87a1698b053b18e31930134a2635 Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Sun, 15 Feb 2026 14:22:49 +0530
Subject: [PATCH] fix: apply telegram voice transcript body substitution
 (#16789) (thanks @Limitless2023) (#16970)

---
 CHANGELOG.md                                  |  1 +
 ...t-message-context.audio-transcript.test.ts | 61 +++++++++++++++++++
 src/telegram/bot-message-context.ts           |  7 ++-
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 src/telegram/bot-message-context.audio-transcript.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fc6ee1a920..aad7413b327 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 - Agents: return an explicit timeout error reply when an embedded run times out before producing any payloads, preventing silent dropped turns during slow cache-refresh transitions. (#16659) Thanks @liaosvcaf and @vignesh07.
 - Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07.
 - CLI/Build: make legacy daemon CLI compatibility shim generation tolerant of minimal tsdown daemon export sets, while preserving restart/register compatibility aliases and surfacing explicit errors for unavailable legacy daemon commands. Thanks @vignesh07.
+- Telegram: replace inbound `<media:audio>` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023.
 
 ## 2026.2.14
 
diff --git a/src/telegram/bot-message-context.audio-transcript.test.ts b/src/telegram/bot-message-context.audio-transcript.test.ts
new file mode 100644
index 00000000000..663260ca559
--- /dev/null
+++ b/src/telegram/bot-message-context.audio-transcript.test.ts
@@ -0,0 +1,61 @@
+import { describe, expect, it, vi } from "vitest";
+import { buildTelegramMessageContext } from "./bot-message-context.js";
+
+const transcribeFirstAudioMock = vi.fn();
+
+vi.mock("../media-understanding/audio-preflight.js", () => ({
+  transcribeFirstAudio: (...args: unknown[]) => transcribeFirstAudioMock(...args),
+}));
+
+describe("buildTelegramMessageContext audio transcript body", () => {
+  it("uses preflight transcript as BodyForAgent for mention-gated group voice messages", async () => {
+    transcribeFirstAudioMock.mockResolvedValueOnce("hey bot please help");
+
+    const ctx = await buildTelegramMessageContext({
+      primaryCtx: {
+        message: {
+          message_id: 1,
+          chat: { id: -1001234567890, type: "supergroup", title: "Test Group" },
+          date: 1700000000,
+          from: { id: 42, first_name: "Alice" },
+          voice: { file_id: "voice-1" },
+        },
+        me: { id: 7, username: "bot" },
+      } as never,
+      allMedia: [{ path: "/tmp/voice.ogg", contentType: "audio/ogg" }],
+      storeAllowFrom: [],
+      options: { forceWasMentioned: true },
+      bot: {
+        api: {
+          sendChatAction: vi.fn(),
+          setMessageReaction: vi.fn(),
+        },
+      } as never,
+      cfg: {
+        agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } },
+        channels: { telegram: {} },
+        messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } },
+      } as never,
+      account: { accountId: "default" } as never,
+      historyLimit: 0,
+      groupHistories: new Map(),
+      dmPolicy: "open",
+      allowFrom: [],
+      groupAllowFrom: [],
+      ackReactionScope: "off",
+      logger: { info: vi.fn() },
+      resolveGroupActivation: () => true,
+      resolveGroupRequireMention: () => true,
+      resolveTelegramGroupConfig: () => ({
+        groupConfig: { requireMention: true },
+        topicConfig: undefined,
+      }),
+    });
+
+    expect(ctx).not.toBeNull();
+    expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
+    expect(ctx?.ctxPayload?.BodyForAgent).toBe("hey bot please help");
+    expect(ctx?.ctxPayload?.Body).toContain("hey bot please help");
+    expect(ctx?.ctxPayload?.Body).not.toContain("<media:audio>");
+  });
+});
diff --git a/src/telegram/bot-message-context.ts b/src/telegram/bot-message-context.ts
index 62d6443c584..9cd8f91106f 100644
--- a/src/telegram/bot-message-context.ts
+++ b/src/telegram/bot-message-context.ts
@@ -425,7 +425,12 @@ export const buildTelegramMessageContext = async ({
     }
   }
 
-  // Build bodyText - if there's audio with transcript, use transcript; otherwise use placeholder
+  // Replace audio placeholder with transcript when preflight succeeds.
+  if (hasAudio && bodyText === "<media:audio>" && preflightTranscript) {
+    bodyText = preflightTranscript;
+  }
+
+  // Build bodyText fallback for messages that still have no text.
   if (!bodyText && allMedia.length > 0) {
     if (hasAudio) {
       bodyText = preflightTranscript || "<media:audio>";