fix: apply telegram voice transcript body substitution (#16789) (thanks @Limitless2023) (#16970)

This commit is contained in:
Ayaan Zaidi
2026-02-15 14:22:49 +05:30
committed by GitHub
parent b65b3c6ff0
commit 2fc479b427
3 changed files with 68 additions and 1 deletions

View File

@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
- Agents: return an explicit timeout error reply when an embedded run times out before producing any payloads, preventing silent dropped turns during slow cache-refresh transitions. (#16659) Thanks @liaosvcaf and @vignesh07. - Agents: return an explicit timeout error reply when an embedded run times out before producing any payloads, preventing silent dropped turns during slow cache-refresh transitions. (#16659) Thanks @liaosvcaf and @vignesh07.
- Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07. - Agents/OpenAI: force `store=true` for direct OpenAI Responses/Codex runs to preserve multi-turn server-side conversation state, while leaving proxy/non-OpenAI endpoints unchanged. (#16803) Thanks @mark9232 and @vignesh07.
- CLI/Build: make legacy daemon CLI compatibility shim generation tolerant of minimal tsdown daemon export sets, while preserving restart/register compatibility aliases and surfacing explicit errors for unavailable legacy daemon commands. Thanks @vignesh07. - CLI/Build: make legacy daemon CLI compatibility shim generation tolerant of minimal tsdown daemon export sets, while preserving restart/register compatibility aliases and surfacing explicit errors for unavailable legacy daemon commands. Thanks @vignesh07.
- Telegram: replace inbound `<media:audio>` placeholder with successful preflight voice transcript in message body context, preventing placeholder-only prompt bodies for mention-gated voice messages. (#16789) Thanks @Limitless2023.
## 2026.2.14 ## 2026.2.14

View File

@@ -0,0 +1,61 @@
import { describe, expect, it, vi } from "vitest";
import { buildTelegramMessageContext } from "./bot-message-context.js";
const transcribeFirstAudioMock = vi.fn();
vi.mock("../media-understanding/audio-preflight.js", () => ({
transcribeFirstAudio: (...args: unknown[]) => transcribeFirstAudioMock(...args),
}));
describe("buildTelegramMessageContext audio transcript body", () => {
it("uses preflight transcript as BodyForAgent for mention-gated group voice messages", async () => {
transcribeFirstAudioMock.mockResolvedValueOnce("hey bot please help");
const ctx = await buildTelegramMessageContext({
primaryCtx: {
message: {
message_id: 1,
chat: { id: -1001234567890, type: "supergroup", title: "Test Group" },
date: 1700000000,
from: { id: 42, first_name: "Alice" },
voice: { file_id: "voice-1" },
},
me: { id: 7, username: "bot" },
} as never,
allMedia: [{ path: "/tmp/voice.ogg", contentType: "audio/ogg" }],
storeAllowFrom: [],
options: { forceWasMentioned: true },
bot: {
api: {
sendChatAction: vi.fn(),
setMessageReaction: vi.fn(),
},
} as never,
cfg: {
agents: { defaults: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/openclaw" } },
channels: { telegram: {} },
messages: { groupChat: { mentionPatterns: ["\\bbot\\b"] } },
} as never,
account: { accountId: "default" } as never,
historyLimit: 0,
groupHistories: new Map(),
dmPolicy: "open",
allowFrom: [],
groupAllowFrom: [],
ackReactionScope: "off",
logger: { info: vi.fn() },
resolveGroupActivation: () => true,
resolveGroupRequireMention: () => true,
resolveTelegramGroupConfig: () => ({
groupConfig: { requireMention: true },
topicConfig: undefined,
}),
});
expect(ctx).not.toBeNull();
expect(transcribeFirstAudioMock).toHaveBeenCalledTimes(1);
expect(ctx?.ctxPayload?.BodyForAgent).toBe("hey bot please help");
expect(ctx?.ctxPayload?.Body).toContain("hey bot please help");
expect(ctx?.ctxPayload?.Body).not.toContain("<media:audio>");
});
});

View File

@@ -425,7 +425,12 @@ export const buildTelegramMessageContext = async ({
} }
} }
// Build bodyText - if there's audio with transcript, use transcript; otherwise use placeholder // Replace audio placeholder with transcript when preflight succeeds.
if (hasAudio && bodyText === "<media:audio>" && preflightTranscript) {
bodyText = preflightTranscript;
}
// Build bodyText fallback for messages that still have no text.
if (!bodyText && allMedia.length > 0) { if (!bodyText && allMedia.length > 0) {
if (hasAudio) { if (hasAudio) {
bodyText = preflightTranscript || "<media:audio>"; bodyText = preflightTranscript || "<media:audio>";