fix: fix: transcribe audio before mention check in groups with requireMention (openclaw#9973) thanks @mcinteerj

Verified:
- pnpm install --frozen-lockfile
- pnpm build
- pnpm check
- pnpm test

Co-authored-by: mcinteerj <3613653+mcinteerj@users.noreply.github.com>
This commit is contained in:
Jake
2026-02-13 04:58:01 +13:00
committed by GitHub
parent a5ab9fac0c
commit a2ddcdadeb
7 changed files with 245 additions and 38 deletions

View File

@@ -1,4 +1,5 @@
import type { Bot } from "grammy";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import type { DmPolicy, TelegramGroupConfig, TelegramTopicConfig } from "../config/types.js";
import type { StickerMetadata, TelegramContext } from "./bot/types.js";
@@ -203,6 +204,21 @@ export const buildTelegramMessageContext = async ({
return null;
}
// Compute requireMention early for preflight transcription gating
const activationOverride = resolveGroupActivation({
chatId,
messageThreadId: resolvedThreadId,
sessionKey: sessionKey,
agentId: route.agentId,
});
const baseRequireMention = resolveGroupRequireMention(chatId);
const requireMention = firstDefined(
activationOverride,
topicConfig?.requireMention,
groupConfig?.requireMention,
baseRequireMention,
);
const sendTyping = async () => {
await withTelegramApiErrorLogging({
operation: "sendChatAction",
@@ -370,6 +386,7 @@ export const buildTelegramMessageContext = async ({
const locationText = locationData ? formatLocationText(locationData) : undefined;
const rawTextSource = msg.text ?? msg.caption ?? "";
const rawText = expandTextLinks(rawTextSource, msg.entities ?? msg.caption_entities).trim();
const hasUserText = Boolean(rawText || locationText);
let rawBody = [rawText, locationText].filter(Boolean).join("\n").trim();
if (!rawBody) {
rawBody = placeholder;
@@ -386,6 +403,35 @@ export const buildTelegramMessageContext = async ({
(ent) => ent.type === "mention",
);
const explicitlyMentioned = botUsername ? hasBotMention(msg, botUsername) : false;
// Preflight audio transcription for mention detection in groups
// This allows voice notes to be checked for mentions before being dropped
let preflightTranscript: string | undefined;
const hasAudio = allMedia.some((media) => media.contentType?.startsWith("audio/"));
const needsPreflightTranscription =
isGroup && requireMention && hasAudio && !hasUserText && mentionRegexes.length > 0;
if (needsPreflightTranscription) {
try {
const { transcribeFirstAudio } = await import("../media-understanding/audio-preflight.js");
// Build a minimal context for transcription
const tempCtx: MsgContext = {
MediaPaths: allMedia.length > 0 ? allMedia.map((m) => m.path) : undefined,
MediaTypes:
allMedia.length > 0
? (allMedia.map((m) => m.contentType).filter(Boolean) as string[])
: undefined,
};
preflightTranscript = await transcribeFirstAudio({
ctx: tempCtx,
cfg,
agentDir: undefined,
});
} catch (err) {
logVerbose(`telegram: audio preflight transcription failed: ${String(err)}`);
}
}
const computedWasMentioned = matchesMentionWithExplicit({
text: msg.text ?? msg.caption ?? "",
mentionRegexes,
@@ -394,6 +440,7 @@ export const buildTelegramMessageContext = async ({
isExplicitlyMentioned: explicitlyMentioned,
canResolveExplicit: Boolean(botUsername),
},
transcript: preflightTranscript,
});
const wasMentioned = options?.forceWasMentioned === true ? true : computedWasMentioned;
if (isGroup && commandGate.shouldBlock) {
@@ -405,19 +452,6 @@ export const buildTelegramMessageContext = async ({
});
return null;
}
const activationOverride = resolveGroupActivation({
chatId,
messageThreadId: resolvedThreadId,
sessionKey: sessionKey,
agentId: route.agentId,
});
const baseRequireMention = resolveGroupRequireMention(chatId);
const requireMention = firstDefined(
activationOverride,
topicConfig?.requireMention,
groupConfig?.requireMention,
baseRequireMention,
);
// Reply-chain detection: replying to a bot message acts like an implicit mention.
const botId = primaryCtx.me?.id;
const replyFromId = msg.reply_to_message?.from?.id;