fix(media): normalize MIME kind detection for audio transcription

This commit is contained in:
Lucenx9
2026-03-02 23:59:32 +01:00
committed by Peter Steinberger
parent 481da215b9
commit ec688d809f
2 changed files with 39 additions and 1 deletions

View File

@@ -361,6 +361,44 @@ describe("applyMediaUnderstanding", () => {
expect(ctx.Body).toBe("[Audio]\nTranscript:\nremote transcript");
});
it("transcribes WhatsApp audio with parameterized MIME despite casing/whitespace", async () => {
const ctx = await createAudioCtx({
fileName: "voice-note",
mediaType: " Audio/Ogg; codecs=opus ",
});
ctx.ChatType = "direct";
ctx.Surface = "whatsapp";
const cfg: OpenClawConfig = {
tools: {
media: {
audio: {
enabled: true,
maxBytes: 1024 * 1024,
scope: {
default: "deny",
rules: [
{ action: "allow", match: { chatType: "dm" } },
{ action: "allow", match: { channel: "whatsapp" } },
],
},
models: [{ provider: "groq" }],
},
},
},
};
const result = await applyMediaUnderstanding({
ctx,
cfg,
providers: createGroqProviders("whatsapp transcript"),
});
expect(result.appliedAudio).toBe(true);
expect(ctx.Transcript).toBe("whatsapp transcript");
expect(ctx.Body).toBe("[Audio]\nTranscript:\nwhatsapp transcript");
});
it("skips URL-only audio when remote file is too small", async () => {
// Override the default mock to return a tiny buffer (below MIN_AUDIO_FILE_BYTES)
mockedFetchRemoteMedia.mockResolvedValueOnce({

View File

@@ -188,5 +188,5 @@ export function imageMimeFromFormat(format?: string | null): string | undefined
}
export function kindFromMime(mime?: string | null): MediaKind {
return mediaKindFromMime(mime);
return mediaKindFromMime(normalizeMimeType(mime));
}