fix: skip audio files from text extraction to prevent binary processing (#7475)

* fix: skip audio files from text extraction early

Audio files should not be processed through extractFileBlocks for text
extraction - they are handled by the dedicated audio transcription
capability (STT).

Previously, audio files were only skipped if they didn't "look like text"
(looksLikeUtf8Text check). This caused issues where some audio binary
data (e.g., long Telegram voice messages) could accidentally pass the
heuristic check and get processed as text content.

This fix:
1. Adds audio to the early skip alongside image/video (more efficient)
2. Removes the redundant secondary check that had the flawed condition

Fixes audio binary being incorrectly processed as text in Telegram and
other platforms.

* Media: skip binary media in file extraction (#7475) (thanks @AlexZhangji)

---------

Co-authored-by: Shakker <shakkerdroid@gmail.com>
This commit is contained in:
Ji
2026-02-02 14:20:04 -08:00
committed by GitHub
parent 966228a6a9
commit f49297e2c1
3 changed files with 33 additions and 28 deletions

View File

@@ -317,6 +317,13 @@ function resolveTextMimeFromName(name?: string): string | undefined {
return TEXT_EXT_MIME.get(ext);
}
function isBinaryMediaMime(mime?: string): boolean {
if (!mime) {
return false;
}
return mime.startsWith("image/") || mime.startsWith("audio/") || mime.startsWith("video/");
}
async function extractFileBlocks(params: {
attachments: ReturnType<typeof normalizeMediaAttachments>;
cache: ReturnType<typeof createMediaAttachmentCache>;
@@ -337,7 +344,7 @@ async function extractFileBlocks(params: {
}
const forcedTextMime = resolveTextMimeFromName(attachment.path ?? attachment.url ?? "");
const kind = forcedTextMime ? "document" : resolveAttachmentKind(attachment);
if (!forcedTextMime && (kind === "image" || kind === "video")) {
if (!forcedTextMime && (kind === "image" || kind === "video" || kind === "audio")) {
continue;
}
if (!limits.allowUrl && attachment.url && !attachment.path) {
@@ -361,16 +368,17 @@ async function extractFileBlocks(params: {
}
const nameHint = bufferResult?.fileName ?? attachment.path ?? attachment.url;
const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
const rawMime = bufferResult?.mime ?? attachment.mime;
const normalizedRawMime = normalizeMimeType(rawMime);
if (!forcedTextMimeResolved && isBinaryMediaMime(normalizedRawMime)) {
continue;
}
const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
const textSample = decodeTextSample(bufferResult?.buffer);
const textLike = Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer);
if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
continue;
}
const guessedDelimited = textLike ? guessDelimitedMime(textSample) : undefined;
const textHint =
forcedTextMimeResolved ?? guessedDelimited ?? (textLike ? "text/plain" : undefined);
const rawMime = bufferResult?.mime ?? attachment.mime;
const mimeType = sanitizeMimeType(textHint ?? normalizeMimeType(rawMime));
// Log when MIME type is overridden from non-text to text for auditability
if (textHint && rawMime && !rawMime.startsWith("text/")) {