refactor: unify media understanding pipeline

This commit is contained in:
Peter Steinberger
2026-01-17 04:38:20 +00:00
parent 49ecbd8fea
commit fcb7c9ff65
24 changed files with 1250 additions and 643 deletions

View File

@@ -12,7 +12,7 @@ export function extractMediaUserText(body?: string): string | undefined {
}
function formatSection(
title: "Audio" | "Video" | "Image",
title: string,
kind: "Transcript" | "Description",
text: string,
userText?: string,
@@ -40,11 +40,21 @@ export function formatMediaUnderstandingBody(params: {
sections.push(`User text:\n${userText}`);
}
const counts = new Map<MediaUnderstandingOutput["kind"], number>();
for (const output of outputs) {
counts.set(output.kind, (counts.get(output.kind) ?? 0) + 1);
}
const seen = new Map<MediaUnderstandingOutput["kind"], number>();
for (const output of outputs) {
const count = counts.get(output.kind) ?? 1;
const next = (seen.get(output.kind) ?? 0) + 1;
seen.set(output.kind, next);
const suffix = count > 1 ? ` ${next}/${count}` : "";
if (output.kind === "audio.transcription") {
sections.push(
formatSection(
"Audio",
`Audio${suffix}`,
"Transcript",
output.text,
outputs.length === 1 ? userText : undefined,
@@ -55,7 +65,7 @@ export function formatMediaUnderstandingBody(params: {
if (output.kind === "image.description") {
sections.push(
formatSection(
"Image",
`Image${suffix}`,
"Description",
output.text,
outputs.length === 1 ? userText : undefined,
@@ -65,7 +75,7 @@ export function formatMediaUnderstandingBody(params: {
}
sections.push(
formatSection(
"Video",
`Video${suffix}`,
"Description",
output.text,
outputs.length === 1 ? userText : undefined,
@@ -75,3 +85,10 @@ export function formatMediaUnderstandingBody(params: {
return sections.join("\n\n").trim();
}
export function formatAudioTranscripts(outputs: MediaUnderstandingOutput[]): string {
if (outputs.length === 1) return outputs[0].text;
return outputs
.map((output, index) => `Audio ${index + 1}:\n${output.text}`)
.join("\n\n");
}