From 2fc479b4274a87a1698b053b18e31930134a2635 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Sun, 15 Feb 2026 14:22:49 +0530 Subject: [PATCH] fix: apply telegram voice transcript body substitution (#16789) (thanks @Limitless2023) (#16970) --- CHANGELOG.md | 1 + ...t-message-context.audio-transcript.test.ts | 61 +++++++++++++++++++ src/telegram/bot-message-context.ts | 7 ++- 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 src/telegram/bot-message-context.audio-transcript.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fc6ee1a920..aad7413b327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai - Agents: return an explicit timeout error reply when an embedded run times out before producing any payloads, preventing silent dropped turns during slow cache-refresh transitions. (#16659) Thanks @liaosvcaf and @vignesh07. - Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07. - CLI/Build: make legacy daemon CLI compatibility shim generation tolerant of minimal tsdown daemon export sets, while preserving restart/register compatibility aliases and surfacing explicit errors for unavailable legacy daemon commands. Thanks @vignesh07. +- Telegram: replace inbound `` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023. ## 2026.2.14 diff --git a/src/telegram/bot-message-context.audio-transcript.test.ts b/src/telegram/bot-message-context.audio-transcript.test.ts new file mode 100644 index 00000000000..663260ca559 --- /dev/null +++ b/src/telegram/bot-message-context.audio-transcript.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it, vi } from "vitest"; +import { buildTelegramMessageContext } from "./bot-message-context.js"; + +const transcribeFirstAudioMock = vi.fn(); + +vi.mock("../media-understanding/audio-preflight.js", () => ({ + transcribeFirstAudio: (...args: unknown[]) => transcribeFirstAudioMock(...args), +})); + +describe("buildTelegramMessageContext audio transcript body", () => { + it("uses preflight transcript as BodyForAgent for mention-gated group voice messages", async () => { + transcribeFirstAudioMock.mockResolvedValueOnce("hey bot please help"); + + const ctx = await buildTelegramMessageContext({ + primaryCtx: { + message: { + message_id: 1, + chat: { id: -1001234567890, type: "supergroup", title: "Test Group" }, + date: 1700000000, + from: { id: 42, first_name: "Alice" }, + voice: { file_id: "voice-1" }, + }, + me: { id: 7, username: "bot" }, + } as never, + allMedia: [{ path: "/tmp/voice.ogg", contentType: "audio/ogg" }], + storeAllowFrom: [], + options: { forceWasMentioned: true }, + bot: { + api: { + sendChatAction: vi.fn(), + setMessageReaction: vi.fn(), + }, + } as never, + cfg: { + agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } }, + channels: { telegram: {} }, + messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } }, + } as never, + account: { accountId: "default" } as never, + historyLimit: 0, + groupHistories: new Map(), + dmPolicy: "open", + allowFrom: [], + groupAllowFrom: [], + ackReactionScope: "off", + logger: { info: vi.fn() }, + resolveGroupActivation: () => true, + resolveGroupRequireMention: () => true, + resolveTelegramGroupConfig: () => ({ + groupConfig: { requireMention: true }, + topicConfig: undefined, + }), + }); + + expect(ctx).not.toBeNull(); + expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1); + expect(ctx?.ctxPayload?.BodyForAgent).toBe("hey bot please help"); + expect(ctx?.ctxPayload?.Body).toContain("hey bot please help"); + expect(ctx?.ctxPayload?.Body).not.toContain(""); + }); +}); diff --git a/src/telegram/bot-message-context.ts b/src/telegram/bot-message-context.ts index 62d6443c584..9cd8f91106f 100644 --- a/src/telegram/bot-message-context.ts +++ b/src/telegram/bot-message-context.ts @@ -425,7 +425,12 @@ export const buildTelegramMessageContext = async ({ } } - // Build bodyText - if there's audio with transcript, use transcript; otherwise use placeholder + // Replace audio placeholder with transcript when preflight succeeds. + if (hasAudio && bodyText === "" && preflightTranscript) { + bodyText = preflightTranscript; + } + + // Build bodyText fallback for messages that still have no text. if (!bodyText && allMedia.length > 0) { if (hasAudio) { bodyText = preflightTranscript || "";