mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 03:41:22 +00:00
refactor(media): split audio helpers and attachment cache
This commit is contained in:
@@ -4,6 +4,7 @@ import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vites
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
|
||||
import { createSafeAudioFixtureBuffer } from "./runner.test-utils.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Module mocks
|
||||
@@ -30,14 +31,17 @@ vi.mock("../agents/model-auth.js", () => ({
|
||||
resolveAuthProfileOrder: vi.fn(() => []),
|
||||
}));
|
||||
|
||||
class MediaFetchErrorMock extends Error {
|
||||
code: string;
|
||||
constructor(message: string, code: string) {
|
||||
super(message);
|
||||
this.name = "MediaFetchError";
|
||||
this.code = code;
|
||||
const { MediaFetchErrorMock } = vi.hoisted(() => {
|
||||
class MediaFetchErrorMock extends Error {
|
||||
code: string;
|
||||
constructor(message: string, code: string) {
|
||||
super(message);
|
||||
this.name = "MediaFetchError";
|
||||
this.code = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
return { MediaFetchErrorMock };
|
||||
});
|
||||
|
||||
vi.mock("../media/fetch.js", () => ({
|
||||
fetchRemoteMedia: vi.fn(),
|
||||
@@ -68,7 +72,7 @@ let suiteTempMediaRootDir = "";
|
||||
async function createTempAudioFile(): Promise<string> {
|
||||
const dir = await fs.mkdtemp(path.join(suiteTempMediaRootDir, "case-"));
|
||||
const filePath = path.join(dir, "note.ogg");
|
||||
await fs.writeFile(filePath, Buffer.alloc(2048, 0xab));
|
||||
await fs.writeFile(filePath, createSafeAudioFixtureBuffer(2048));
|
||||
return filePath;
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import { fetchRemoteMedia } from "../media/fetch.js";
|
||||
import { runExec } from "../process/exec.js";
|
||||
import { withEnvAsync } from "../test-utils/env.js";
|
||||
import { clearMediaUnderstandingBinaryCacheForTests } from "./runner.js";
|
||||
import { createSafeAudioFixtureBuffer } from "./runner.test-utils.js";
|
||||
|
||||
vi.mock("../agents/model-auth.js", () => ({
|
||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||
@@ -174,7 +175,7 @@ async function createAudioCtx(params?: {
|
||||
}): Promise<MsgContext> {
|
||||
const mediaPath = await createTempMediaFile({
|
||||
fileName: params?.fileName ?? "note.ogg",
|
||||
content: params?.content ?? Buffer.alloc(2048, 0xab),
|
||||
content: params?.content ?? createSafeAudioFixtureBuffer(2048),
|
||||
});
|
||||
return {
|
||||
Body: params?.body ?? "<media:audio>",
|
||||
@@ -190,7 +191,7 @@ async function setupAudioAutoDetectCase(stdout: string): Promise<{
|
||||
const ctx = await createAudioCtx({
|
||||
fileName: "sample.wav",
|
||||
mediaType: "audio/wav",
|
||||
content: Buffer.alloc(2048, 0xab),
|
||||
content: createSafeAudioFixtureBuffer(2048),
|
||||
});
|
||||
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
||||
mockedRunExec.mockResolvedValueOnce({
|
||||
@@ -249,7 +250,7 @@ describe("applyMediaUnderstanding", () => {
|
||||
mockedFetchRemoteMedia.mockClear();
|
||||
mockedRunExec.mockReset();
|
||||
mockedFetchRemoteMedia.mockResolvedValue({
|
||||
buffer: Buffer.alloc(2048, 0xab),
|
||||
buffer: createSafeAudioFixtureBuffer(2048),
|
||||
contentType: "audio/ogg",
|
||||
fileName: "note.ogg",
|
||||
});
|
||||
@@ -540,7 +541,7 @@ describe("applyMediaUnderstanding", () => {
|
||||
const ctx = await createAudioCtx({
|
||||
fileName: "sample.wav",
|
||||
mediaType: "audio/wav",
|
||||
content: Buffer.alloc(2048, 0xab),
|
||||
content: createSafeAudioFixtureBuffer(2048),
|
||||
});
|
||||
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
||||
mockedResolveApiKey.mockResolvedValue({
|
||||
@@ -654,7 +655,7 @@ describe("applyMediaUnderstanding", () => {
|
||||
it("uses active model when enabled and models are missing", async () => {
|
||||
const audioPath = await createTempMediaFile({
|
||||
fileName: "fallback.ogg",
|
||||
content: Buffer.alloc(2048, 0xab),
|
||||
content: createSafeAudioFixtureBuffer(2048),
|
||||
});
|
||||
|
||||
const ctx: MsgContext = {
|
||||
@@ -690,7 +691,7 @@ describe("applyMediaUnderstanding", () => {
|
||||
|
||||
it("handles multiple audio attachments when attachment mode is all", async () => {
|
||||
const dir = await createTempMediaDir();
|
||||
const audioBytes = Buffer.alloc(2048, 0xab);
|
||||
const audioBytes = createSafeAudioFixtureBuffer(2048);
|
||||
const audioPathA = path.join(dir, "note-a.ogg");
|
||||
const audioPathB = path.join(dir, "note-b.ogg");
|
||||
await fs.writeFile(audioPathA, audioBytes);
|
||||
@@ -737,7 +738,7 @@ describe("applyMediaUnderstanding", () => {
|
||||
const audioPath = path.join(dir, "note.ogg");
|
||||
const videoPath = path.join(dir, "clip.mp4");
|
||||
await fs.writeFile(imagePath, "image-bytes");
|
||||
await fs.writeFile(audioPath, Buffer.alloc(2048, 0xab));
|
||||
await fs.writeFile(audioPath, createSafeAudioFixtureBuffer(2048));
|
||||
await fs.writeFile(videoPath, "video-bytes");
|
||||
|
||||
const ctx: MsgContext = {
|
||||
|
||||
@@ -8,9 +8,9 @@ import {
|
||||
normalizeMimeType,
|
||||
resolveInputFileLimits,
|
||||
} from "../media/input-files.js";
|
||||
import { isDeliverableMessageChannel } from "../utils/message-channel.js";
|
||||
import { resolveAttachmentKind } from "./attachments.js";
|
||||
import { runWithConcurrency } from "./concurrency.js";
|
||||
import { DEFAULT_ECHO_TRANSCRIPT_FORMAT, sendTranscriptEcho } from "./echo-transcript.js";
|
||||
import {
|
||||
extractMediaUserText,
|
||||
formatAudioTranscripts,
|
||||
@@ -463,68 +463,6 @@ async function extractFileBlocks(params: {
|
||||
return blocks;
|
||||
}
|
||||
|
||||
const DEFAULT_ECHO_FORMAT = '📝 "{transcript}"';
|
||||
|
||||
/**
|
||||
* Formats a transcript echo message using the configured format string.
|
||||
* Replaces `{transcript}` placeholder with the actual transcript text.
|
||||
*/
|
||||
function formatEchoTranscript(transcript: string, format: string): string {
|
||||
return format.replace("{transcript}", transcript);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends the transcript echo back to the originating chat.
|
||||
* Best-effort: logs on failure, never throws.
|
||||
*/
|
||||
async function sendTranscriptEcho(params: {
|
||||
ctx: MsgContext;
|
||||
cfg: OpenClawConfig;
|
||||
transcript: string;
|
||||
format: string;
|
||||
}): Promise<void> {
|
||||
const { ctx, cfg, transcript, format } = params;
|
||||
const channel = ctx.Provider ?? ctx.Surface ?? "";
|
||||
const to = ctx.OriginatingTo ?? ctx.From ?? "";
|
||||
|
||||
if (!channel || !to) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose("media: echo-transcript skipped (no channel/to resolved from ctx)");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedChannel = channel.trim().toLowerCase();
|
||||
if (!isDeliverableMessageChannel(normalizedChannel)) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`media: echo-transcript skipped (channel "${String(normalizedChannel)}" is not deliverable)`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const text = formatEchoTranscript(transcript, format);
|
||||
|
||||
try {
|
||||
const { deliverOutboundPayloads } = await import("../infra/outbound/deliver.js");
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: normalizedChannel,
|
||||
to,
|
||||
accountId: ctx.AccountId ?? undefined,
|
||||
threadId: ctx.MessageThreadId ?? undefined,
|
||||
payloads: [{ text }],
|
||||
bestEffort: true,
|
||||
});
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`media: echo-transcript sent to ${normalizedChannel}/${to}`);
|
||||
}
|
||||
} catch (err) {
|
||||
logVerbose(`media: echo-transcript delivery failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function applyMediaUnderstanding(params: {
|
||||
ctx: MsgContext;
|
||||
cfg: OpenClawConfig;
|
||||
@@ -598,7 +536,7 @@ export async function applyMediaUnderstanding(params: {
|
||||
ctx,
|
||||
cfg,
|
||||
transcript,
|
||||
format: audioCfg.echoFormat ?? DEFAULT_ECHO_FORMAT,
|
||||
format: audioCfg.echoFormat ?? DEFAULT_ECHO_TRANSCRIPT_FORMAT,
|
||||
});
|
||||
}
|
||||
} else if (originalUserText) {
|
||||
|
||||
323
src/media-understanding/attachments.cache.ts
Normal file
323
src/media-understanding/attachments.cache.ts
Normal file
@@ -0,0 +1,323 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { isAbortError } from "../infra/unhandled-rejections.js";
|
||||
import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js";
|
||||
import {
|
||||
DEFAULT_IMESSAGE_ATTACHMENT_ROOTS,
|
||||
isInboundPathAllowed,
|
||||
mergeInboundPathRoots,
|
||||
} from "../media/inbound-path-policy.js";
|
||||
import { getDefaultMediaLocalRoots } from "../media/local-roots.js";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
import { buildRandomTempFilePath } from "../plugin-sdk/temp-path.js";
|
||||
import { normalizeAttachmentPath } from "./attachments.normalize.js";
|
||||
import { MediaUnderstandingSkipError } from "./errors.js";
|
||||
import { fetchWithTimeout } from "./providers/shared.js";
|
||||
import type { MediaAttachment } from "./types.js";
|
||||
|
||||
type MediaBufferResult = {
|
||||
buffer: Buffer;
|
||||
mime?: string;
|
||||
fileName: string;
|
||||
size: number;
|
||||
};
|
||||
|
||||
type MediaPathResult = {
|
||||
path: string;
|
||||
cleanup?: () => Promise<void> | void;
|
||||
};
|
||||
|
||||
type AttachmentCacheEntry = {
|
||||
attachment: MediaAttachment;
|
||||
resolvedPath?: string;
|
||||
statSize?: number;
|
||||
buffer?: Buffer;
|
||||
bufferMime?: string;
|
||||
bufferFileName?: string;
|
||||
tempPath?: string;
|
||||
tempCleanup?: () => Promise<void>;
|
||||
};
|
||||
|
||||
const DEFAULT_LOCAL_PATH_ROOTS = mergeInboundPathRoots(
|
||||
getDefaultMediaLocalRoots(),
|
||||
DEFAULT_IMESSAGE_ATTACHMENT_ROOTS,
|
||||
);
|
||||
|
||||
export type MediaAttachmentCacheOptions = {
|
||||
localPathRoots?: readonly string[];
|
||||
};
|
||||
|
||||
function resolveRequestUrl(input: RequestInfo | URL): string {
|
||||
if (typeof input === "string") {
|
||||
return input;
|
||||
}
|
||||
if (input instanceof URL) {
|
||||
return input.toString();
|
||||
}
|
||||
return input.url;
|
||||
}
|
||||
|
||||
export class MediaAttachmentCache {
|
||||
private readonly entries = new Map<number, AttachmentCacheEntry>();
|
||||
private readonly attachments: MediaAttachment[];
|
||||
private readonly localPathRoots: readonly string[];
|
||||
private canonicalLocalPathRoots?: Promise<readonly string[]>;
|
||||
|
||||
constructor(attachments: MediaAttachment[], options?: MediaAttachmentCacheOptions) {
|
||||
this.attachments = attachments;
|
||||
this.localPathRoots = mergeInboundPathRoots(options?.localPathRoots, DEFAULT_LOCAL_PATH_ROOTS);
|
||||
for (const attachment of attachments) {
|
||||
this.entries.set(attachment.index, { attachment });
|
||||
}
|
||||
}
|
||||
|
||||
async getBuffer(params: {
|
||||
attachmentIndex: number;
|
||||
maxBytes: number;
|
||||
timeoutMs: number;
|
||||
}): Promise<MediaBufferResult> {
|
||||
const entry = await this.ensureEntry(params.attachmentIndex);
|
||||
if (entry.buffer) {
|
||||
if (entry.buffer.length > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
buffer: entry.buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName ?? `media-${params.attachmentIndex + 1}`,
|
||||
size: entry.buffer.length,
|
||||
};
|
||||
}
|
||||
|
||||
if (entry.resolvedPath) {
|
||||
const size = await this.ensureLocalStat(entry);
|
||||
if (entry.resolvedPath) {
|
||||
if (size !== undefined && size > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
const buffer = await fs.readFile(entry.resolvedPath);
|
||||
entry.buffer = buffer;
|
||||
entry.bufferMime =
|
||||
entry.bufferMime ??
|
||||
entry.attachment.mime ??
|
||||
(await detectMime({
|
||||
buffer,
|
||||
filePath: entry.resolvedPath,
|
||||
}));
|
||||
entry.bufferFileName =
|
||||
path.basename(entry.resolvedPath) || `media-${params.attachmentIndex + 1}`;
|
||||
return {
|
||||
buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName,
|
||||
size: buffer.length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const url = entry.attachment.url?.trim();
|
||||
if (!url) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"empty",
|
||||
`Attachment ${params.attachmentIndex + 1} has no path or URL.`,
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const fetchImpl = (input: RequestInfo | URL, init?: RequestInit) =>
|
||||
fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, fetch);
|
||||
const fetched = await fetchRemoteMedia({ url, fetchImpl, maxBytes: params.maxBytes });
|
||||
entry.buffer = fetched.buffer;
|
||||
entry.bufferMime =
|
||||
entry.attachment.mime ??
|
||||
fetched.contentType ??
|
||||
(await detectMime({
|
||||
buffer: fetched.buffer,
|
||||
filePath: fetched.fileName ?? url,
|
||||
}));
|
||||
entry.bufferFileName = fetched.fileName ?? `media-${params.attachmentIndex + 1}`;
|
||||
return {
|
||||
buffer: fetched.buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName,
|
||||
size: fetched.buffer.length,
|
||||
};
|
||||
} catch (err) {
|
||||
if (err instanceof MediaFetchError && err.code === "max_bytes") {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
if (isAbortError(err)) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"timeout",
|
||||
`Attachment ${params.attachmentIndex + 1} timed out while fetching.`,
|
||||
);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async getPath(params: {
|
||||
attachmentIndex: number;
|
||||
maxBytes?: number;
|
||||
timeoutMs: number;
|
||||
}): Promise<MediaPathResult> {
|
||||
const entry = await this.ensureEntry(params.attachmentIndex);
|
||||
if (entry.resolvedPath) {
|
||||
if (params.maxBytes) {
|
||||
const size = await this.ensureLocalStat(entry);
|
||||
if (entry.resolvedPath) {
|
||||
if (size !== undefined && size > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (entry.resolvedPath) {
|
||||
return { path: entry.resolvedPath };
|
||||
}
|
||||
}
|
||||
|
||||
if (entry.tempPath) {
|
||||
if (params.maxBytes && entry.buffer && entry.buffer.length > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
return { path: entry.tempPath, cleanup: entry.tempCleanup };
|
||||
}
|
||||
|
||||
const maxBytes = params.maxBytes ?? Number.POSITIVE_INFINITY;
|
||||
const bufferResult = await this.getBuffer({
|
||||
attachmentIndex: params.attachmentIndex,
|
||||
maxBytes,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const extension = path.extname(bufferResult.fileName || "") || "";
|
||||
const tmpPath = buildRandomTempFilePath({
|
||||
prefix: "openclaw-media",
|
||||
extension,
|
||||
});
|
||||
await fs.writeFile(tmpPath, bufferResult.buffer);
|
||||
entry.tempPath = tmpPath;
|
||||
entry.tempCleanup = async () => {
|
||||
await fs.unlink(tmpPath).catch(() => {});
|
||||
};
|
||||
return { path: tmpPath, cleanup: entry.tempCleanup };
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
const cleanups: Array<Promise<void> | void> = [];
|
||||
for (const entry of this.entries.values()) {
|
||||
if (entry.tempCleanup) {
|
||||
cleanups.push(Promise.resolve(entry.tempCleanup()));
|
||||
entry.tempCleanup = undefined;
|
||||
}
|
||||
}
|
||||
await Promise.all(cleanups);
|
||||
}
|
||||
|
||||
private async ensureEntry(attachmentIndex: number): Promise<AttachmentCacheEntry> {
|
||||
const existing = this.entries.get(attachmentIndex);
|
||||
if (existing) {
|
||||
if (!existing.resolvedPath) {
|
||||
existing.resolvedPath = this.resolveLocalPath(existing.attachment);
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
const attachment = this.attachments.find((item) => item.index === attachmentIndex) ?? {
|
||||
index: attachmentIndex,
|
||||
};
|
||||
const entry: AttachmentCacheEntry = {
|
||||
attachment,
|
||||
resolvedPath: this.resolveLocalPath(attachment),
|
||||
};
|
||||
this.entries.set(attachmentIndex, entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
private resolveLocalPath(attachment: MediaAttachment): string | undefined {
|
||||
const rawPath = normalizeAttachmentPath(attachment.path);
|
||||
if (!rawPath) {
|
||||
return undefined;
|
||||
}
|
||||
return path.isAbsolute(rawPath) ? rawPath : path.resolve(rawPath);
|
||||
}
|
||||
|
||||
private async ensureLocalStat(entry: AttachmentCacheEntry): Promise<number | undefined> {
|
||||
if (!entry.resolvedPath) {
|
||||
return undefined;
|
||||
}
|
||||
if (!isInboundPathAllowed({ filePath: entry.resolvedPath, roots: this.localPathRoots })) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Blocked attachment path outside allowed roots: ${entry.attachment.path ?? entry.attachment.url ?? "(unknown)"}`,
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
if (entry.statSize !== undefined) {
|
||||
return entry.statSize;
|
||||
}
|
||||
try {
|
||||
const currentPath = entry.resolvedPath;
|
||||
const stat = await fs.stat(currentPath);
|
||||
if (!stat.isFile()) {
|
||||
entry.resolvedPath = undefined;
|
||||
return undefined;
|
||||
}
|
||||
const canonicalPath = await fs.realpath(currentPath).catch(() => currentPath);
|
||||
const canonicalRoots = await this.getCanonicalLocalPathRoots();
|
||||
if (!isInboundPathAllowed({ filePath: canonicalPath, roots: canonicalRoots })) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Blocked canonicalized attachment path outside allowed roots: ${canonicalPath}`,
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
entry.resolvedPath = canonicalPath;
|
||||
entry.statSize = stat.size;
|
||||
return stat.size;
|
||||
} catch (err) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Failed to read attachment ${entry.attachment.index + 1}: ${String(err)}`);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private async getCanonicalLocalPathRoots(): Promise<readonly string[]> {
|
||||
if (this.canonicalLocalPathRoots) {
|
||||
return await this.canonicalLocalPathRoots;
|
||||
}
|
||||
this.canonicalLocalPathRoots = (async () =>
|
||||
mergeInboundPathRoots(
|
||||
this.localPathRoots,
|
||||
await Promise.all(
|
||||
this.localPathRoots.map(async (root) => {
|
||||
if (root.includes("*")) {
|
||||
return root;
|
||||
}
|
||||
return await fs.realpath(root).catch(() => root);
|
||||
}),
|
||||
),
|
||||
))();
|
||||
return await this.canonicalLocalPathRoots;
|
||||
}
|
||||
}
|
||||
108
src/media-understanding/attachments.normalize.ts
Normal file
108
src/media-understanding/attachments.normalize.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import { getFileExtension, isAudioFileName, kindFromMime } from "../media/mime.js";
|
||||
import type { MediaAttachment } from "./types.js";
|
||||
|
||||
export function normalizeAttachmentPath(raw?: string | null): string | undefined {
|
||||
const value = raw?.trim();
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
if (value.startsWith("file://")) {
|
||||
try {
|
||||
return fileURLToPath(value);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function normalizeAttachments(ctx: MsgContext): MediaAttachment[] {
|
||||
const pathsFromArray = Array.isArray(ctx.MediaPaths) ? ctx.MediaPaths : undefined;
|
||||
const urlsFromArray = Array.isArray(ctx.MediaUrls) ? ctx.MediaUrls : undefined;
|
||||
const typesFromArray = Array.isArray(ctx.MediaTypes) ? ctx.MediaTypes : undefined;
|
||||
const resolveMime = (count: number, index: number) => {
|
||||
const typeHint = typesFromArray?.[index];
|
||||
const trimmed = typeof typeHint === "string" ? typeHint.trim() : "";
|
||||
if (trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
return count === 1 ? ctx.MediaType : undefined;
|
||||
};
|
||||
|
||||
if (pathsFromArray && pathsFromArray.length > 0) {
|
||||
const count = pathsFromArray.length;
|
||||
const urls = urlsFromArray && urlsFromArray.length > 0 ? urlsFromArray : undefined;
|
||||
return pathsFromArray
|
||||
.map((value, index) => ({
|
||||
path: value?.trim() || undefined,
|
||||
url: urls?.[index] ?? ctx.MediaUrl,
|
||||
mime: resolveMime(count, index),
|
||||
index,
|
||||
}))
|
||||
.filter((entry) => Boolean(entry.path?.trim() || entry.url?.trim()));
|
||||
}
|
||||
|
||||
if (urlsFromArray && urlsFromArray.length > 0) {
|
||||
const count = urlsFromArray.length;
|
||||
return urlsFromArray
|
||||
.map((value, index) => ({
|
||||
path: undefined,
|
||||
url: value?.trim() || undefined,
|
||||
mime: resolveMime(count, index),
|
||||
index,
|
||||
}))
|
||||
.filter((entry) => Boolean(entry.url?.trim()));
|
||||
}
|
||||
|
||||
const pathValue = ctx.MediaPath?.trim();
|
||||
const url = ctx.MediaUrl?.trim();
|
||||
if (!pathValue && !url) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
path: pathValue || undefined,
|
||||
url: url || undefined,
|
||||
mime: ctx.MediaType,
|
||||
index: 0,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
export function resolveAttachmentKind(
|
||||
attachment: MediaAttachment,
|
||||
): "image" | "audio" | "video" | "document" | "unknown" {
|
||||
const kind = kindFromMime(attachment.mime);
|
||||
if (kind === "image" || kind === "audio" || kind === "video") {
|
||||
return kind;
|
||||
}
|
||||
|
||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
||||
if (!ext) {
|
||||
return "unknown";
|
||||
}
|
||||
if ([".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext)) {
|
||||
return "video";
|
||||
}
|
||||
if (isAudioFileName(attachment.path ?? attachment.url)) {
|
||||
return "audio";
|
||||
}
|
||||
if ([".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
||||
return "image";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "video";
|
||||
}
|
||||
|
||||
export function isAudioAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "audio";
|
||||
}
|
||||
|
||||
export function isImageAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "image";
|
||||
}
|
||||
89
src/media-understanding/attachments.select.ts
Normal file
89
src/media-understanding/attachments.select.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import type { MediaUnderstandingAttachmentsConfig } from "../config/types.tools.js";
|
||||
import {
|
||||
isAudioAttachment,
|
||||
isImageAttachment,
|
||||
isVideoAttachment,
|
||||
} from "./attachments.normalize.js";
|
||||
import type { MediaAttachment, MediaUnderstandingCapability } from "./types.js";
|
||||
|
||||
const DEFAULT_MAX_ATTACHMENTS = 1;
|
||||
|
||||
function orderAttachments(
|
||||
attachments: MediaAttachment[],
|
||||
prefer?: MediaUnderstandingAttachmentsConfig["prefer"],
|
||||
): MediaAttachment[] {
|
||||
const list = Array.isArray(attachments) ? attachments.filter(isAttachmentRecord) : [];
|
||||
if (!prefer || prefer === "first") {
|
||||
return list;
|
||||
}
|
||||
if (prefer === "last") {
|
||||
return [...list].toReversed();
|
||||
}
|
||||
if (prefer === "path") {
|
||||
const withPath = list.filter((item) => item.path);
|
||||
const withoutPath = list.filter((item) => !item.path);
|
||||
return [...withPath, ...withoutPath];
|
||||
}
|
||||
if (prefer === "url") {
|
||||
const withUrl = list.filter((item) => item.url);
|
||||
const withoutUrl = list.filter((item) => !item.url);
|
||||
return [...withUrl, ...withoutUrl];
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
function isAttachmentRecord(value: unknown): value is MediaAttachment {
|
||||
if (!value || typeof value !== "object") {
|
||||
return false;
|
||||
}
|
||||
const entry = value as Record<string, unknown>;
|
||||
if (typeof entry.index !== "number") {
|
||||
return false;
|
||||
}
|
||||
if (entry.path !== undefined && typeof entry.path !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.url !== undefined && typeof entry.url !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.mime !== undefined && typeof entry.mime !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.alreadyTranscribed !== undefined && typeof entry.alreadyTranscribed !== "boolean") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function selectAttachments(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
attachments: MediaAttachment[];
|
||||
policy?: MediaUnderstandingAttachmentsConfig;
|
||||
}): MediaAttachment[] {
|
||||
const { capability, attachments, policy } = params;
|
||||
const input = Array.isArray(attachments) ? attachments.filter(isAttachmentRecord) : [];
|
||||
const matches = input.filter((item) => {
|
||||
// Skip already-transcribed audio attachments from preflight
|
||||
if (capability === "audio" && item.alreadyTranscribed) {
|
||||
return false;
|
||||
}
|
||||
if (capability === "image") {
|
||||
return isImageAttachment(item);
|
||||
}
|
||||
if (capability === "audio") {
|
||||
return isAudioAttachment(item);
|
||||
}
|
||||
return isVideoAttachment(item);
|
||||
});
|
||||
if (matches.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const ordered = orderAttachments(matches, policy?.prefer);
|
||||
const mode = policy?.mode ?? "first";
|
||||
const maxAttachments = policy?.maxAttachments ?? DEFAULT_MAX_ATTACHMENTS;
|
||||
if (mode === "all") {
|
||||
return ordered.slice(0, Math.max(1, maxAttachments));
|
||||
}
|
||||
return ordered.slice(0, 1);
|
||||
}
|
||||
@@ -1,510 +1,9 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { MediaUnderstandingAttachmentsConfig } from "../config/types.tools.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { isAbortError } from "../infra/unhandled-rejections.js";
|
||||
import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js";
|
||||
import {
|
||||
DEFAULT_IMESSAGE_ATTACHMENT_ROOTS,
|
||||
isInboundPathAllowed,
|
||||
mergeInboundPathRoots,
|
||||
} from "../media/inbound-path-policy.js";
|
||||
import { getDefaultMediaLocalRoots } from "../media/local-roots.js";
|
||||
import { detectMime, getFileExtension, isAudioFileName, kindFromMime } from "../media/mime.js";
|
||||
import { buildRandomTempFilePath } from "../plugin-sdk/temp-path.js";
|
||||
import { MediaUnderstandingSkipError } from "./errors.js";
|
||||
import { fetchWithTimeout } from "./providers/shared.js";
|
||||
import type { MediaAttachment, MediaUnderstandingCapability } from "./types.js";
|
||||
|
||||
type MediaBufferResult = {
|
||||
buffer: Buffer;
|
||||
mime?: string;
|
||||
fileName: string;
|
||||
size: number;
|
||||
};
|
||||
|
||||
type MediaPathResult = {
|
||||
path: string;
|
||||
cleanup?: () => Promise<void> | void;
|
||||
};
|
||||
|
||||
type AttachmentCacheEntry = {
|
||||
attachment: MediaAttachment;
|
||||
resolvedPath?: string;
|
||||
statSize?: number;
|
||||
buffer?: Buffer;
|
||||
bufferMime?: string;
|
||||
bufferFileName?: string;
|
||||
tempPath?: string;
|
||||
tempCleanup?: () => Promise<void>;
|
||||
};
|
||||
|
||||
const DEFAULT_MAX_ATTACHMENTS = 1;
|
||||
const DEFAULT_LOCAL_PATH_ROOTS = mergeInboundPathRoots(
|
||||
getDefaultMediaLocalRoots(),
|
||||
DEFAULT_IMESSAGE_ATTACHMENT_ROOTS,
|
||||
);
|
||||
|
||||
export type MediaAttachmentCacheOptions = {
|
||||
localPathRoots?: readonly string[];
|
||||
};
|
||||
|
||||
function normalizeAttachmentPath(raw?: string | null): string | undefined {
|
||||
const value = raw?.trim();
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
if (value.startsWith("file://")) {
|
||||
try {
|
||||
return fileURLToPath(value);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function normalizeAttachments(ctx: MsgContext): MediaAttachment[] {
|
||||
const pathsFromArray = Array.isArray(ctx.MediaPaths) ? ctx.MediaPaths : undefined;
|
||||
const urlsFromArray = Array.isArray(ctx.MediaUrls) ? ctx.MediaUrls : undefined;
|
||||
const typesFromArray = Array.isArray(ctx.MediaTypes) ? ctx.MediaTypes : undefined;
|
||||
const resolveMime = (count: number, index: number) => {
|
||||
const typeHint = typesFromArray?.[index];
|
||||
const trimmed = typeof typeHint === "string" ? typeHint.trim() : "";
|
||||
if (trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
return count === 1 ? ctx.MediaType : undefined;
|
||||
};
|
||||
|
||||
if (pathsFromArray && pathsFromArray.length > 0) {
|
||||
const count = pathsFromArray.length;
|
||||
const urls = urlsFromArray && urlsFromArray.length > 0 ? urlsFromArray : undefined;
|
||||
return pathsFromArray
|
||||
.map((value, index) => ({
|
||||
path: value?.trim() || undefined,
|
||||
url: urls?.[index] ?? ctx.MediaUrl,
|
||||
mime: resolveMime(count, index),
|
||||
index,
|
||||
}))
|
||||
.filter((entry) => Boolean(entry.path?.trim() || entry.url?.trim()));
|
||||
}
|
||||
|
||||
if (urlsFromArray && urlsFromArray.length > 0) {
|
||||
const count = urlsFromArray.length;
|
||||
return urlsFromArray
|
||||
.map((value, index) => ({
|
||||
path: undefined,
|
||||
url: value?.trim() || undefined,
|
||||
mime: resolveMime(count, index),
|
||||
index,
|
||||
}))
|
||||
.filter((entry) => Boolean(entry.url?.trim()));
|
||||
}
|
||||
|
||||
const pathValue = ctx.MediaPath?.trim();
|
||||
const url = ctx.MediaUrl?.trim();
|
||||
if (!pathValue && !url) {
|
||||
return [];
|
||||
}
|
||||
return [
|
||||
{
|
||||
path: pathValue || undefined,
|
||||
url: url || undefined,
|
||||
mime: ctx.MediaType,
|
||||
index: 0,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
export function resolveAttachmentKind(
|
||||
attachment: MediaAttachment,
|
||||
): "image" | "audio" | "video" | "document" | "unknown" {
|
||||
const kind = kindFromMime(attachment.mime);
|
||||
if (kind === "image" || kind === "audio" || kind === "video") {
|
||||
return kind;
|
||||
}
|
||||
|
||||
const ext = getFileExtension(attachment.path ?? attachment.url);
|
||||
if (!ext) {
|
||||
return "unknown";
|
||||
}
|
||||
if ([".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"].includes(ext)) {
|
||||
return "video";
|
||||
}
|
||||
if (isAudioFileName(attachment.path ?? attachment.url)) {
|
||||
return "audio";
|
||||
}
|
||||
if ([".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".tif"].includes(ext)) {
|
||||
return "image";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function isVideoAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "video";
|
||||
}
|
||||
|
||||
export function isAudioAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "audio";
|
||||
}
|
||||
|
||||
export function isImageAttachment(attachment: MediaAttachment): boolean {
|
||||
return resolveAttachmentKind(attachment) === "image";
|
||||
}
|
||||
|
||||
function resolveRequestUrl(input: RequestInfo | URL): string {
|
||||
if (typeof input === "string") {
|
||||
return input;
|
||||
}
|
||||
if (input instanceof URL) {
|
||||
return input.toString();
|
||||
}
|
||||
return input.url;
|
||||
}
|
||||
|
||||
function orderAttachments(
|
||||
attachments: MediaAttachment[],
|
||||
prefer?: MediaUnderstandingAttachmentsConfig["prefer"],
|
||||
): MediaAttachment[] {
|
||||
const list = Array.isArray(attachments) ? attachments.filter(isAttachmentRecord) : [];
|
||||
if (!prefer || prefer === "first") {
|
||||
return list;
|
||||
}
|
||||
if (prefer === "last") {
|
||||
return [...list].toReversed();
|
||||
}
|
||||
if (prefer === "path") {
|
||||
const withPath = list.filter((item) => item.path);
|
||||
const withoutPath = list.filter((item) => !item.path);
|
||||
return [...withPath, ...withoutPath];
|
||||
}
|
||||
if (prefer === "url") {
|
||||
const withUrl = list.filter((item) => item.url);
|
||||
const withoutUrl = list.filter((item) => !item.url);
|
||||
return [...withUrl, ...withoutUrl];
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
function isAttachmentRecord(value: unknown): value is MediaAttachment {
|
||||
if (!value || typeof value !== "object") {
|
||||
return false;
|
||||
}
|
||||
const entry = value as Record<string, unknown>;
|
||||
if (typeof entry.index !== "number") {
|
||||
return false;
|
||||
}
|
||||
if (entry.path !== undefined && typeof entry.path !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.url !== undefined && typeof entry.url !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.mime !== undefined && typeof entry.mime !== "string") {
|
||||
return false;
|
||||
}
|
||||
if (entry.alreadyTranscribed !== undefined && typeof entry.alreadyTranscribed !== "boolean") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function selectAttachments(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
attachments: MediaAttachment[];
|
||||
policy?: MediaUnderstandingAttachmentsConfig;
|
||||
}): MediaAttachment[] {
|
||||
const { capability, attachments, policy } = params;
|
||||
const input = Array.isArray(attachments) ? attachments.filter(isAttachmentRecord) : [];
|
||||
const matches = input.filter((item) => {
|
||||
// Skip already-transcribed audio attachments from preflight
|
||||
if (capability === "audio" && item.alreadyTranscribed) {
|
||||
return false;
|
||||
}
|
||||
if (capability === "image") {
|
||||
return isImageAttachment(item);
|
||||
}
|
||||
if (capability === "audio") {
|
||||
return isAudioAttachment(item);
|
||||
}
|
||||
return isVideoAttachment(item);
|
||||
});
|
||||
if (matches.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const ordered = orderAttachments(matches, policy?.prefer);
|
||||
const mode = policy?.mode ?? "first";
|
||||
const maxAttachments = policy?.maxAttachments ?? DEFAULT_MAX_ATTACHMENTS;
|
||||
if (mode === "all") {
|
||||
return ordered.slice(0, Math.max(1, maxAttachments));
|
||||
}
|
||||
return ordered.slice(0, 1);
|
||||
}
|
||||
|
||||
export class MediaAttachmentCache {
|
||||
private readonly entries = new Map<number, AttachmentCacheEntry>();
|
||||
private readonly attachments: MediaAttachment[];
|
||||
private readonly localPathRoots: readonly string[];
|
||||
private canonicalLocalPathRoots?: Promise<readonly string[]>;
|
||||
|
||||
constructor(attachments: MediaAttachment[], options?: MediaAttachmentCacheOptions) {
|
||||
this.attachments = attachments;
|
||||
this.localPathRoots = mergeInboundPathRoots(options?.localPathRoots, DEFAULT_LOCAL_PATH_ROOTS);
|
||||
for (const attachment of attachments) {
|
||||
this.entries.set(attachment.index, { attachment });
|
||||
}
|
||||
}
|
||||
|
||||
async getBuffer(params: {
|
||||
attachmentIndex: number;
|
||||
maxBytes: number;
|
||||
timeoutMs: number;
|
||||
}): Promise<MediaBufferResult> {
|
||||
const entry = await this.ensureEntry(params.attachmentIndex);
|
||||
if (entry.buffer) {
|
||||
if (entry.buffer.length > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
buffer: entry.buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName ?? `media-${params.attachmentIndex + 1}`,
|
||||
size: entry.buffer.length,
|
||||
};
|
||||
}
|
||||
|
||||
if (entry.resolvedPath) {
|
||||
const size = await this.ensureLocalStat(entry);
|
||||
if (entry.resolvedPath) {
|
||||
if (size !== undefined && size > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
const buffer = await fs.readFile(entry.resolvedPath);
|
||||
entry.buffer = buffer;
|
||||
entry.bufferMime =
|
||||
entry.bufferMime ??
|
||||
entry.attachment.mime ??
|
||||
(await detectMime({
|
||||
buffer,
|
||||
filePath: entry.resolvedPath,
|
||||
}));
|
||||
entry.bufferFileName =
|
||||
path.basename(entry.resolvedPath) || `media-${params.attachmentIndex + 1}`;
|
||||
return {
|
||||
buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName,
|
||||
size: buffer.length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const url = entry.attachment.url?.trim();
|
||||
if (!url) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"empty",
|
||||
`Attachment ${params.attachmentIndex + 1} has no path or URL.`,
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const fetchImpl = (input: RequestInfo | URL, init?: RequestInit) =>
|
||||
fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, fetch);
|
||||
const fetched = await fetchRemoteMedia({ url, fetchImpl, maxBytes: params.maxBytes });
|
||||
entry.buffer = fetched.buffer;
|
||||
entry.bufferMime =
|
||||
entry.attachment.mime ??
|
||||
fetched.contentType ??
|
||||
(await detectMime({
|
||||
buffer: fetched.buffer,
|
||||
filePath: fetched.fileName ?? url,
|
||||
}));
|
||||
entry.bufferFileName = fetched.fileName ?? `media-${params.attachmentIndex + 1}`;
|
||||
return {
|
||||
buffer: fetched.buffer,
|
||||
mime: entry.bufferMime,
|
||||
fileName: entry.bufferFileName,
|
||||
size: fetched.buffer.length,
|
||||
};
|
||||
} catch (err) {
|
||||
if (err instanceof MediaFetchError && err.code === "max_bytes") {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
if (isAbortError(err)) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"timeout",
|
||||
`Attachment ${params.attachmentIndex + 1} timed out while fetching.`,
|
||||
);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async getPath(params: {
|
||||
attachmentIndex: number;
|
||||
maxBytes?: number;
|
||||
timeoutMs: number;
|
||||
}): Promise<MediaPathResult> {
|
||||
const entry = await this.ensureEntry(params.attachmentIndex);
|
||||
if (entry.resolvedPath) {
|
||||
if (params.maxBytes) {
|
||||
const size = await this.ensureLocalStat(entry);
|
||||
if (entry.resolvedPath) {
|
||||
if (size !== undefined && size > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (entry.resolvedPath) {
|
||||
return { path: entry.resolvedPath };
|
||||
}
|
||||
}
|
||||
|
||||
if (entry.tempPath) {
|
||||
if (params.maxBytes && entry.buffer && entry.buffer.length > params.maxBytes) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"maxBytes",
|
||||
`Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`,
|
||||
);
|
||||
}
|
||||
return { path: entry.tempPath, cleanup: entry.tempCleanup };
|
||||
}
|
||||
|
||||
const maxBytes = params.maxBytes ?? Number.POSITIVE_INFINITY;
|
||||
const bufferResult = await this.getBuffer({
|
||||
attachmentIndex: params.attachmentIndex,
|
||||
maxBytes,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const extension = path.extname(bufferResult.fileName || "") || "";
|
||||
const tmpPath = buildRandomTempFilePath({
|
||||
prefix: "openclaw-media",
|
||||
extension,
|
||||
});
|
||||
await fs.writeFile(tmpPath, bufferResult.buffer);
|
||||
entry.tempPath = tmpPath;
|
||||
entry.tempCleanup = async () => {
|
||||
await fs.unlink(tmpPath).catch(() => {});
|
||||
};
|
||||
return { path: tmpPath, cleanup: entry.tempCleanup };
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
const cleanups: Array<Promise<void> | void> = [];
|
||||
for (const entry of this.entries.values()) {
|
||||
if (entry.tempCleanup) {
|
||||
cleanups.push(Promise.resolve(entry.tempCleanup()));
|
||||
entry.tempCleanup = undefined;
|
||||
}
|
||||
}
|
||||
await Promise.all(cleanups);
|
||||
}
|
||||
|
||||
private async ensureEntry(attachmentIndex: number): Promise<AttachmentCacheEntry> {
|
||||
const existing = this.entries.get(attachmentIndex);
|
||||
if (existing) {
|
||||
if (!existing.resolvedPath) {
|
||||
existing.resolvedPath = this.resolveLocalPath(existing.attachment);
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
const attachment = this.attachments.find((item) => item.index === attachmentIndex) ?? {
|
||||
index: attachmentIndex,
|
||||
};
|
||||
const entry: AttachmentCacheEntry = {
|
||||
attachment,
|
||||
resolvedPath: this.resolveLocalPath(attachment),
|
||||
};
|
||||
this.entries.set(attachmentIndex, entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
private resolveLocalPath(attachment: MediaAttachment): string | undefined {
|
||||
const rawPath = normalizeAttachmentPath(attachment.path);
|
||||
if (!rawPath) {
|
||||
return undefined;
|
||||
}
|
||||
return path.isAbsolute(rawPath) ? rawPath : path.resolve(rawPath);
|
||||
}
|
||||
|
||||
private async ensureLocalStat(entry: AttachmentCacheEntry): Promise<number | undefined> {
|
||||
if (!entry.resolvedPath) {
|
||||
return undefined;
|
||||
}
|
||||
if (!isInboundPathAllowed({ filePath: entry.resolvedPath, roots: this.localPathRoots })) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Blocked attachment path outside allowed roots: ${entry.attachment.path ?? entry.attachment.url ?? "(unknown)"}`,
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
if (entry.statSize !== undefined) {
|
||||
return entry.statSize;
|
||||
}
|
||||
try {
|
||||
const currentPath = entry.resolvedPath;
|
||||
const stat = await fs.stat(currentPath);
|
||||
if (!stat.isFile()) {
|
||||
entry.resolvedPath = undefined;
|
||||
return undefined;
|
||||
}
|
||||
const canonicalPath = await fs.realpath(currentPath).catch(() => currentPath);
|
||||
const canonicalRoots = await this.getCanonicalLocalPathRoots();
|
||||
if (!isInboundPathAllowed({ filePath: canonicalPath, roots: canonicalRoots })) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Blocked canonicalized attachment path outside allowed roots: ${canonicalPath}`,
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
entry.resolvedPath = canonicalPath;
|
||||
entry.statSize = stat.size;
|
||||
return stat.size;
|
||||
} catch (err) {
|
||||
entry.resolvedPath = undefined;
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Failed to read attachment ${entry.attachment.index + 1}: ${String(err)}`);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private async getCanonicalLocalPathRoots(): Promise<readonly string[]> {
|
||||
if (this.canonicalLocalPathRoots) {
|
||||
return await this.canonicalLocalPathRoots;
|
||||
}
|
||||
this.canonicalLocalPathRoots = (async () =>
|
||||
mergeInboundPathRoots(
|
||||
this.localPathRoots,
|
||||
await Promise.all(
|
||||
this.localPathRoots.map(async (root) => {
|
||||
if (root.includes("*")) {
|
||||
return root;
|
||||
}
|
||||
return await fs.realpath(root).catch(() => root);
|
||||
}),
|
||||
),
|
||||
))();
|
||||
return await this.canonicalLocalPathRoots;
|
||||
}
|
||||
}
|
||||
export {
|
||||
isAudioAttachment,
|
||||
isImageAttachment,
|
||||
isVideoAttachment,
|
||||
normalizeAttachments,
|
||||
resolveAttachmentKind,
|
||||
} from "./attachments.normalize.js";
|
||||
export { selectAttachments } from "./attachments.select.js";
|
||||
export { MediaAttachmentCache, type MediaAttachmentCacheOptions } from "./attachments.cache.js";
|
||||
|
||||
@@ -2,13 +2,11 @@ import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { isAudioAttachment } from "./attachments.js";
|
||||
import { runAudioTranscription } from "./audio-transcription-runner.js";
|
||||
import {
|
||||
type ActiveMediaModel,
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
resolveMediaAttachmentLocalRoots,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
import type { MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
@@ -50,31 +48,17 @@ export async function transcribeFirstAudio(params: {
|
||||
logVerbose(`audio-preflight: transcribing attachment ${firstAudio.index} for mention check`);
|
||||
}
|
||||
|
||||
const providerRegistry = buildProviderRegistry(params.providers);
|
||||
const cache = createMediaAttachmentCache(attachments, {
|
||||
localPathRoots: resolveMediaAttachmentLocalRoots({ cfg, ctx }),
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
const { transcript } = await runAudioTranscription({
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
cfg,
|
||||
attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config: audioConfig,
|
||||
providers: params.providers,
|
||||
activeModel: params.activeModel,
|
||||
localPathRoots: resolveMediaAttachmentLocalRoots({ cfg, ctx }),
|
||||
});
|
||||
|
||||
if (!result || result.outputs.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Extract transcript from first audio output
|
||||
const audioOutput = result.outputs.find((output) => output.kind === "audio.transcription");
|
||||
if (!audioOutput || !audioOutput.text) {
|
||||
if (!transcript) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -83,18 +67,16 @@ export async function transcribeFirstAudio(params: {
|
||||
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`audio-preflight: transcribed ${audioOutput.text.length} chars from attachment ${firstAudio.index}`,
|
||||
`audio-preflight: transcribed ${transcript.length} chars from attachment ${firstAudio.index}`,
|
||||
);
|
||||
}
|
||||
|
||||
return audioOutput.text;
|
||||
return transcript;
|
||||
} catch (err) {
|
||||
// Log but don't throw - let the message proceed with text-only mention check
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`audio-preflight: transcription failed: ${String(err)}`);
|
||||
}
|
||||
return undefined;
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
50
src/media-understanding/audio-transcription-runner.ts
Normal file
50
src/media-understanding/audio-transcription-runner.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
type ActiveMediaModel,
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
import type { MediaAttachment, MediaUnderstandingProvider } from "./types.js";
|
||||
|
||||
export async function runAudioTranscription(params: {
|
||||
ctx: MsgContext;
|
||||
cfg: OpenClawConfig;
|
||||
attachments?: MediaAttachment[];
|
||||
agentDir?: string;
|
||||
providers?: Record<string, MediaUnderstandingProvider>;
|
||||
activeModel?: ActiveMediaModel;
|
||||
localPathRoots?: readonly string[];
|
||||
}): Promise<{ transcript: string | undefined; attachments: MediaAttachment[] }> {
|
||||
const attachments = params.attachments ?? normalizeMediaAttachments(params.ctx);
|
||||
if (attachments.length === 0) {
|
||||
return { transcript: undefined, attachments };
|
||||
}
|
||||
|
||||
const providerRegistry = buildProviderRegistry(params.providers);
|
||||
const cache = createMediaAttachmentCache(
|
||||
attachments,
|
||||
params.localPathRoots ? { localPathRoots: params.localPathRoots } : undefined,
|
||||
);
|
||||
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx: params.ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
activeModel: params.activeModel,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const transcript = output?.text?.trim();
|
||||
return { transcript: transcript || undefined, attachments };
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
62
src/media-understanding/echo-transcript.ts
Normal file
62
src/media-understanding/echo-transcript.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { isDeliverableMessageChannel } from "../utils/message-channel.js";
|
||||
|
||||
export const DEFAULT_ECHO_TRANSCRIPT_FORMAT = '📝 "{transcript}"';
|
||||
|
||||
function formatEchoTranscript(transcript: string, format: string): string {
|
||||
return format.replace("{transcript}", transcript);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends the transcript echo back to the originating chat.
|
||||
* Best-effort: logs on failure, never throws.
|
||||
*/
|
||||
export async function sendTranscriptEcho(params: {
|
||||
ctx: MsgContext;
|
||||
cfg: OpenClawConfig;
|
||||
transcript: string;
|
||||
format?: string;
|
||||
}): Promise<void> {
|
||||
const { ctx, cfg, transcript } = params;
|
||||
const channel = ctx.Provider ?? ctx.Surface ?? "";
|
||||
const to = ctx.OriginatingTo ?? ctx.From ?? "";
|
||||
|
||||
if (!channel || !to) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose("media: echo-transcript skipped (no channel/to resolved from ctx)");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedChannel = channel.trim().toLowerCase();
|
||||
if (!isDeliverableMessageChannel(normalizedChannel)) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`media: echo-transcript skipped (channel "${String(normalizedChannel)}" is not deliverable)`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const text = formatEchoTranscript(transcript, params.format ?? DEFAULT_ECHO_TRANSCRIPT_FORMAT);
|
||||
|
||||
try {
|
||||
const { deliverOutboundPayloads } = await import("../infra/outbound/deliver.js");
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: normalizedChannel,
|
||||
to,
|
||||
accountId: ctx.AccountId ?? undefined,
|
||||
threadId: ctx.MessageThreadId ?? undefined,
|
||||
payloads: [{ text }],
|
||||
bestEffort: true,
|
||||
});
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`media: echo-transcript sent to ${normalizedChannel}/${to}`);
|
||||
}
|
||||
} catch (err) {
|
||||
logVerbose(`media: echo-transcript delivery failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
@@ -368,6 +368,16 @@ export function formatDecisionSummary(decision: MediaUnderstandingDecision): str
|
||||
return `${decision.capability}: ${decision.outcome}${countLabel}${viaLabel}${reasonLabel}`;
|
||||
}
|
||||
|
||||
function assertMinAudioSize(params: { size: number; attachmentIndex: number }): void {
|
||||
if (params.size >= MIN_AUDIO_FILE_BYTES) {
|
||||
return;
|
||||
}
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"tooSmall",
|
||||
`Audio attachment ${params.attachmentIndex + 1} is too small (${params.size} bytes, minimum ${MIN_AUDIO_FILE_BYTES})`,
|
||||
);
|
||||
}
|
||||
|
||||
export async function runProviderEntry(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
@@ -449,12 +459,7 @@ export async function runProviderEntry(params: {
|
||||
maxBytes,
|
||||
timeoutMs,
|
||||
});
|
||||
if (media.size < MIN_AUDIO_FILE_BYTES) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"tooSmall",
|
||||
`Audio attachment ${params.attachmentIndex + 1} is too small (${media.size} bytes, minimum ${MIN_AUDIO_FILE_BYTES})`,
|
||||
);
|
||||
}
|
||||
assertMinAudioSize({ size: media.size, attachmentIndex: params.attachmentIndex });
|
||||
const { apiKeys, baseUrl, headers } = await resolveProviderExecutionContext({
|
||||
providerId,
|
||||
cfg,
|
||||
@@ -574,12 +579,7 @@ export async function runCliEntry(params: {
|
||||
});
|
||||
if (capability === "audio") {
|
||||
const stat = await fs.stat(pathResult.path);
|
||||
if (stat.size < MIN_AUDIO_FILE_BYTES) {
|
||||
throw new MediaUnderstandingSkipError(
|
||||
"tooSmall",
|
||||
`Audio attachment ${params.attachmentIndex + 1} is too small (${stat.size} bytes, minimum ${MIN_AUDIO_FILE_BYTES})`,
|
||||
);
|
||||
}
|
||||
assertMinAudioSize({ size: stat.size, attachmentIndex: params.attachmentIndex });
|
||||
}
|
||||
const outputDir = await fs.mkdtemp(
|
||||
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-media-cli-"),
|
||||
|
||||
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { withEnvAsync } from "../test-utils/env.js";
|
||||
import { MIN_AUDIO_FILE_BYTES } from "./defaults.js";
|
||||
import { createMediaAttachmentCache, normalizeMediaAttachments } from "./runner.js";
|
||||
|
||||
type MediaFixtureParams = {
|
||||
@@ -49,12 +50,18 @@ export async function withAudioFixture(
|
||||
filePrefix,
|
||||
extension: "wav",
|
||||
mediaType: "audio/wav",
|
||||
fileContents: Buffer.alloc(2048, 0x52),
|
||||
fileContents: createSafeAudioFixtureBuffer(2048, 0x52),
|
||||
},
|
||||
run,
|
||||
);
|
||||
}
|
||||
|
||||
export function createSafeAudioFixtureBuffer(size?: number, fill = 0xab): Buffer {
|
||||
const minSafeSize = MIN_AUDIO_FILE_BYTES + 1;
|
||||
const finalSize = Math.max(size ?? minSafeSize, minSafeSize);
|
||||
return Buffer.alloc(finalSize, fill);
|
||||
}
|
||||
|
||||
export async function withVideoFixture(
|
||||
filePrefix: string,
|
||||
run: (params: MediaFixtureParams) => Promise<void>,
|
||||
|
||||
@@ -1,32 +1,13 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
const {
|
||||
normalizeMediaAttachments,
|
||||
createMediaAttachmentCache,
|
||||
buildProviderRegistry,
|
||||
runCapability,
|
||||
cacheCleanup,
|
||||
} = vi.hoisted(() => {
|
||||
const normalizeMediaAttachments = vi.fn();
|
||||
const cacheCleanup = vi.fn(async () => {});
|
||||
const createMediaAttachmentCache = vi.fn(() => ({ cleanup: cacheCleanup }));
|
||||
const buildProviderRegistry = vi.fn(() => new Map());
|
||||
const runCapability = vi.fn();
|
||||
return {
|
||||
normalizeMediaAttachments,
|
||||
createMediaAttachmentCache,
|
||||
buildProviderRegistry,
|
||||
runCapability,
|
||||
cacheCleanup,
|
||||
};
|
||||
const { runAudioTranscription } = vi.hoisted(() => {
|
||||
const runAudioTranscription = vi.fn();
|
||||
return { runAudioTranscription };
|
||||
});
|
||||
|
||||
vi.mock("./runner.js", () => ({
|
||||
normalizeMediaAttachments,
|
||||
createMediaAttachmentCache,
|
||||
buildProviderRegistry,
|
||||
runCapability,
|
||||
vi.mock("./audio-transcription-runner.js", () => ({
|
||||
runAudioTranscription,
|
||||
}));
|
||||
|
||||
import { transcribeAudioFile } from "./transcribe-audio.js";
|
||||
@@ -34,30 +15,29 @@ import { transcribeAudioFile } from "./transcribe-audio.js";
|
||||
describe("transcribeAudioFile", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
cacheCleanup.mockResolvedValue(undefined);
|
||||
});
|
||||
|
||||
it("does not force audio/wav when mime is omitted", async () => {
|
||||
normalizeMediaAttachments.mockReturnValue([{ index: 0, path: "/tmp/note.mp3" }]);
|
||||
runCapability.mockResolvedValue({
|
||||
outputs: [{ kind: "audio.transcription", text: " hello " }],
|
||||
});
|
||||
runAudioTranscription.mockResolvedValue({ transcript: "hello", attachments: [] });
|
||||
|
||||
const result = await transcribeAudioFile({
|
||||
filePath: "/tmp/note.mp3",
|
||||
cfg: {} as OpenClawConfig,
|
||||
});
|
||||
|
||||
expect(normalizeMediaAttachments).toHaveBeenCalledWith({
|
||||
MediaPath: "/tmp/note.mp3",
|
||||
MediaType: undefined,
|
||||
expect(runAudioTranscription).toHaveBeenCalledWith({
|
||||
ctx: {
|
||||
MediaPath: "/tmp/note.mp3",
|
||||
MediaType: undefined,
|
||||
},
|
||||
cfg: {} as OpenClawConfig,
|
||||
agentDir: undefined,
|
||||
});
|
||||
expect(result).toEqual({ text: "hello" });
|
||||
expect(cacheCleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("returns undefined and skips cache when there are no attachments", async () => {
|
||||
normalizeMediaAttachments.mockReturnValue([]);
|
||||
it("returns undefined when helper returns no transcript", async () => {
|
||||
runAudioTranscription.mockResolvedValue({ transcript: undefined, attachments: [] });
|
||||
|
||||
const result = await transcribeAudioFile({
|
||||
filePath: "/tmp/missing.wav",
|
||||
@@ -65,16 +45,13 @@ describe("transcribeAudioFile", () => {
|
||||
});
|
||||
|
||||
expect(result).toEqual({ text: undefined });
|
||||
expect(createMediaAttachmentCache).not.toHaveBeenCalled();
|
||||
expect(runCapability).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("always cleans up cache on errors", async () => {
|
||||
it("propagates helper errors", async () => {
|
||||
const cfg = {
|
||||
tools: { media: { audio: { timeoutSeconds: 10 } } },
|
||||
} as unknown as OpenClawConfig;
|
||||
normalizeMediaAttachments.mockReturnValue([{ index: 0, path: "/tmp/note.wav" }]);
|
||||
runCapability.mockRejectedValue(new Error("boom"));
|
||||
runAudioTranscription.mockRejectedValue(new Error("boom"));
|
||||
|
||||
await expect(
|
||||
transcribeAudioFile({
|
||||
@@ -82,14 +59,5 @@ describe("transcribeAudioFile", () => {
|
||||
cfg,
|
||||
}),
|
||||
).rejects.toThrow("boom");
|
||||
|
||||
expect(runCapability).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
capability: "audio",
|
||||
cfg,
|
||||
config: cfg.tools?.media?.audio,
|
||||
}),
|
||||
);
|
||||
expect(cacheCleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
import { runAudioTranscription } from "./audio-transcription-runner.js";
|
||||
|
||||
/**
|
||||
* Transcribe an audio file using the configured media-understanding provider.
|
||||
@@ -25,27 +20,10 @@ export async function transcribeAudioFile(params: {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: params.mime,
|
||||
};
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return { text: undefined };
|
||||
}
|
||||
const cache = createMediaAttachmentCache(attachments);
|
||||
const providerRegistry = buildProviderRegistry();
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const text = output?.text?.trim();
|
||||
return { text: text || undefined };
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
const { transcript } = await runAudioTranscription({
|
||||
ctx,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
return { text: transcript };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user