mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 09:28:37 +00:00
test: consolidate media auto-detect coverage
This commit is contained in:
@@ -43,7 +43,6 @@ const unitIsolatedFilesRaw = [
|
|||||||
"src/agents/subagent-announce.format.test.ts",
|
"src/agents/subagent-announce.format.test.ts",
|
||||||
"src/infra/archive.test.ts",
|
"src/infra/archive.test.ts",
|
||||||
"src/cli/daemon-cli.coverage.test.ts",
|
"src/cli/daemon-cli.coverage.test.ts",
|
||||||
"test/media-understanding.auto.test.ts",
|
|
||||||
// Model normalization test imports config/model discovery stack; keep off unit-fast critical path.
|
// Model normalization test imports config/model discovery stack; keep off unit-fast critical path.
|
||||||
"src/agents/models-config.normalizes-gemini-3-ids-preview-google-providers.test.ts",
|
"src/agents/models-config.normalizes-gemini-3-ids-preview-google-providers.test.ts",
|
||||||
// Auth profile rotation suite is retry-heavy and high-variance under vmForks contention.
|
// Auth profile rotation suite is retry-heavy and high-variance under vmForks contention.
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import type { MsgContext } from "../auto-reply/templating.js";
|
|||||||
import type { OpenClawConfig } from "../config/config.js";
|
import type { OpenClawConfig } from "../config/config.js";
|
||||||
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
|
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
|
||||||
import { fetchRemoteMedia } from "../media/fetch.js";
|
import { fetchRemoteMedia } from "../media/fetch.js";
|
||||||
|
import { withEnvAsync } from "../test-utils/env.js";
|
||||||
|
import { clearMediaUnderstandingBinaryCacheForTests } from "./runner.js";
|
||||||
|
|
||||||
vi.mock("../agents/model-auth.js", () => ({
|
vi.mock("../agents/model-auth.js", () => ({
|
||||||
resolveApiKeyForProvider: vi.fn(async () => ({
|
resolveApiKeyForProvider: vi.fn(async () => ({
|
||||||
@@ -115,12 +117,38 @@ async function createTempMediaFile(params: { fileName: string; content: Buffer |
|
|||||||
return mediaPath;
|
return mediaPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function createMockExecutable(dir: string, name: string) {
|
||||||
|
const executablePath = path.join(dir, name);
|
||||||
|
await fs.writeFile(executablePath, "echo mocked\n", { mode: 0o755 });
|
||||||
|
return executablePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function withMediaAutoDetectEnv<T>(
|
||||||
|
env: Record<string, string | undefined>,
|
||||||
|
run: () => Promise<T>,
|
||||||
|
): Promise<T> {
|
||||||
|
return await withEnvAsync(
|
||||||
|
{
|
||||||
|
SHERPA_ONNX_MODEL_DIR: undefined,
|
||||||
|
WHISPER_CPP_MODEL: undefined,
|
||||||
|
OPENAI_API_KEY: undefined,
|
||||||
|
GROQ_API_KEY: undefined,
|
||||||
|
DEEPGRAM_API_KEY: undefined,
|
||||||
|
GEMINI_API_KEY: undefined,
|
||||||
|
OPENCLAW_AGENT_DIR: undefined,
|
||||||
|
PI_CODING_AGENT_DIR: undefined,
|
||||||
|
...env,
|
||||||
|
},
|
||||||
|
run,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
async function createAudioCtx(params?: {
|
async function createAudioCtx(params?: {
|
||||||
body?: string;
|
body?: string;
|
||||||
fileName?: string;
|
fileName?: string;
|
||||||
mediaType?: string;
|
mediaType?: string;
|
||||||
content?: Buffer | string;
|
content?: Buffer | string;
|
||||||
}) {
|
}): Promise<MsgContext> {
|
||||||
const mediaPath = await createTempMediaFile({
|
const mediaPath = await createTempMediaFile({
|
||||||
fileName: params?.fileName ?? "note.ogg",
|
fileName: params?.fileName ?? "note.ogg",
|
||||||
content: params?.content ?? Buffer.from([0, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
|
content: params?.content ?? Buffer.from([0, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
|
||||||
@@ -179,6 +207,7 @@ describe("applyMediaUnderstanding", () => {
|
|||||||
contentType: "audio/ogg",
|
contentType: "audio/ogg",
|
||||||
fileName: "note.ogg",
|
fileName: "note.ogg",
|
||||||
});
|
});
|
||||||
|
clearMediaUnderstandingBinaryCacheForTests();
|
||||||
});
|
});
|
||||||
|
|
||||||
afterAll(async () => {
|
afterAll(async () => {
|
||||||
@@ -357,6 +386,119 @@ describe("applyMediaUnderstanding", () => {
|
|||||||
expect(ctx.Body).toBe("[Audio]\nTranscript:\ncli transcript");
|
expect(ctx.Body).toBe("[Audio]\nTranscript:\ncli transcript");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("auto-detects sherpa for audio when binary and model files are available", async () => {
|
||||||
|
const binDir = await createTempMediaDir();
|
||||||
|
const modelDir = await createTempMediaDir();
|
||||||
|
await createMockExecutable(binDir, "sherpa-onnx-offline");
|
||||||
|
await fs.writeFile(path.join(modelDir, "tokens.txt"), "a");
|
||||||
|
await fs.writeFile(path.join(modelDir, "encoder.onnx"), "a");
|
||||||
|
await fs.writeFile(path.join(modelDir, "decoder.onnx"), "a");
|
||||||
|
await fs.writeFile(path.join(modelDir, "joiner.onnx"), "a");
|
||||||
|
|
||||||
|
const ctx = await createAudioCtx({
|
||||||
|
fileName: "sample.wav",
|
||||||
|
mediaType: "audio/wav",
|
||||||
|
content: "audio",
|
||||||
|
});
|
||||||
|
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
||||||
|
|
||||||
|
const execModule = await import("../process/exec.js");
|
||||||
|
const mockedRunExec = vi.mocked(execModule.runExec);
|
||||||
|
mockedRunExec.mockResolvedValueOnce({
|
||||||
|
stdout: '{"text":"sherpa ok"}',
|
||||||
|
stderr: "",
|
||||||
|
});
|
||||||
|
|
||||||
|
await withMediaAutoDetectEnv(
|
||||||
|
{
|
||||||
|
PATH: binDir,
|
||||||
|
SHERPA_ONNX_MODEL_DIR: modelDir,
|
||||||
|
},
|
||||||
|
async () => {
|
||||||
|
const result = await applyMediaUnderstanding({ ctx, cfg });
|
||||||
|
expect(result.appliedAudio).toBe(true);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(ctx.Transcript).toBe("sherpa ok");
|
||||||
|
expect(mockedRunExec).toHaveBeenCalledWith(
|
||||||
|
"sherpa-onnx-offline",
|
||||||
|
expect.any(Array),
|
||||||
|
expect.any(Object),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("auto-detects whisper-cli when sherpa is unavailable", async () => {
|
||||||
|
const binDir = await createTempMediaDir();
|
||||||
|
const modelDir = await createTempMediaDir();
|
||||||
|
await createMockExecutable(binDir, "whisper-cli");
|
||||||
|
const modelPath = path.join(modelDir, "tiny.bin");
|
||||||
|
await fs.writeFile(modelPath, "model");
|
||||||
|
|
||||||
|
const ctx = await createAudioCtx({
|
||||||
|
fileName: "sample.wav",
|
||||||
|
mediaType: "audio/wav",
|
||||||
|
content: "audio",
|
||||||
|
});
|
||||||
|
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
||||||
|
|
||||||
|
const execModule = await import("../process/exec.js");
|
||||||
|
const mockedRunExec = vi.mocked(execModule.runExec);
|
||||||
|
mockedRunExec.mockResolvedValueOnce({
|
||||||
|
stdout: "whisper cpp ok\n",
|
||||||
|
stderr: "",
|
||||||
|
});
|
||||||
|
|
||||||
|
await withMediaAutoDetectEnv(
|
||||||
|
{
|
||||||
|
PATH: binDir,
|
||||||
|
WHISPER_CPP_MODEL: modelPath,
|
||||||
|
},
|
||||||
|
async () => {
|
||||||
|
const result = await applyMediaUnderstanding({ ctx, cfg });
|
||||||
|
expect(result.appliedAudio).toBe(true);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(ctx.Transcript).toBe("whisper cpp ok");
|
||||||
|
expect(mockedRunExec).toHaveBeenCalledWith(
|
||||||
|
"whisper-cli",
|
||||||
|
expect.any(Array),
|
||||||
|
expect.any(Object),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips audio auto-detect when no supported binaries or provider keys are available", async () => {
|
||||||
|
const emptyBinDir = await createTempMediaDir();
|
||||||
|
const isolatedAgentDir = await createTempMediaDir();
|
||||||
|
const ctx = await createAudioCtx({
|
||||||
|
fileName: "sample.wav",
|
||||||
|
mediaType: "audio/wav",
|
||||||
|
content: "audio",
|
||||||
|
});
|
||||||
|
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
||||||
|
|
||||||
|
const execModule = await import("../process/exec.js");
|
||||||
|
const mockedRunExec = vi.mocked(execModule.runExec);
|
||||||
|
mockedRunExec.mockReset();
|
||||||
|
|
||||||
|
await withMediaAutoDetectEnv(
|
||||||
|
{
|
||||||
|
PATH: emptyBinDir,
|
||||||
|
OPENCLAW_AGENT_DIR: isolatedAgentDir,
|
||||||
|
PI_CODING_AGENT_DIR: isolatedAgentDir,
|
||||||
|
},
|
||||||
|
async () => {
|
||||||
|
const result = await applyMediaUnderstanding({ ctx, cfg });
|
||||||
|
expect(result.appliedAudio).toBe(false);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(ctx.Transcript).toBeUndefined();
|
||||||
|
expect(ctx.Body).toBe("<media:audio>");
|
||||||
|
expect(mockedRunExec).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
it("uses CLI image understanding and preserves caption for commands", async () => {
|
it("uses CLI image understanding and preserves caption for commands", async () => {
|
||||||
const imagePath = await createTempMediaFile({
|
const imagePath = await createTempMediaFile({
|
||||||
fileName: "photo.jpg",
|
fileName: "photo.jpg",
|
||||||
|
|||||||
@@ -1,223 +0,0 @@
|
|||||||
import fs from "node:fs/promises";
|
|
||||||
import path from "node:path";
|
|
||||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
||||||
import type { MsgContext } from "../src/auto-reply/templating.js";
|
|
||||||
import type { OpenClawConfig } from "../src/config/config.js";
|
|
||||||
import { resolvePreferredOpenClawTmpDir } from "../src/infra/tmp-openclaw-dir.js";
|
|
||||||
import { applyMediaUnderstanding } from "../src/media-understanding/apply.js";
|
|
||||||
import { clearMediaUnderstandingBinaryCacheForTests } from "../src/media-understanding/runner.js";
|
|
||||||
|
|
||||||
const makeTempDir = async (prefix: string) => {
|
|
||||||
const baseDir = resolvePreferredOpenClawTmpDir();
|
|
||||||
await fs.mkdir(baseDir, { recursive: true });
|
|
||||||
return await fs.mkdtemp(path.join(baseDir, prefix));
|
|
||||||
};
|
|
||||||
|
|
||||||
const writeExecutable = async (dir: string, name: string, content: string) => {
|
|
||||||
const filePath = path.join(dir, name);
|
|
||||||
await fs.writeFile(filePath, content, { mode: 0o755 });
|
|
||||||
return filePath;
|
|
||||||
};
|
|
||||||
|
|
||||||
const makeTempMedia = async (ext: string) => {
|
|
||||||
const dir = await makeTempDir("openclaw-media-e2e-");
|
|
||||||
const filePath = path.join(dir, `sample${ext}`);
|
|
||||||
await fs.writeFile(filePath, "audio");
|
|
||||||
return { dir, filePath };
|
|
||||||
};
|
|
||||||
|
|
||||||
const envSnapshot = () => ({
|
|
||||||
PATH: process.env.PATH,
|
|
||||||
SHERPA_ONNX_MODEL_DIR: process.env.SHERPA_ONNX_MODEL_DIR,
|
|
||||||
WHISPER_CPP_MODEL: process.env.WHISPER_CPP_MODEL,
|
|
||||||
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
|
|
||||||
GROQ_API_KEY: process.env.GROQ_API_KEY,
|
|
||||||
DEEPGRAM_API_KEY: process.env.DEEPGRAM_API_KEY,
|
|
||||||
GEMINI_API_KEY: process.env.GEMINI_API_KEY,
|
|
||||||
OPENCLAW_AGENT_DIR: process.env.OPENCLAW_AGENT_DIR,
|
|
||||||
PI_CODING_AGENT_DIR: process.env.PI_CODING_AGENT_DIR,
|
|
||||||
});
|
|
||||||
|
|
||||||
const restoreEnv = (snapshot: ReturnType<typeof envSnapshot>) => {
|
|
||||||
const restoreEnvVar = (key: string, value: string | undefined) => {
|
|
||||||
if (value === undefined) {
|
|
||||||
delete process.env[key];
|
|
||||||
} else {
|
|
||||||
process.env[key] = value;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
restoreEnvVar("PATH", snapshot.PATH);
|
|
||||||
restoreEnvVar("SHERPA_ONNX_MODEL_DIR", snapshot.SHERPA_ONNX_MODEL_DIR);
|
|
||||||
restoreEnvVar("WHISPER_CPP_MODEL", snapshot.WHISPER_CPP_MODEL);
|
|
||||||
restoreEnvVar("OPENAI_API_KEY", snapshot.OPENAI_API_KEY);
|
|
||||||
restoreEnvVar("GROQ_API_KEY", snapshot.GROQ_API_KEY);
|
|
||||||
restoreEnvVar("DEEPGRAM_API_KEY", snapshot.DEEPGRAM_API_KEY);
|
|
||||||
restoreEnvVar("GEMINI_API_KEY", snapshot.GEMINI_API_KEY);
|
|
||||||
restoreEnvVar("OPENCLAW_AGENT_DIR", snapshot.OPENCLAW_AGENT_DIR);
|
|
||||||
restoreEnvVar("PI_CODING_AGENT_DIR", snapshot.PI_CODING_AGENT_DIR);
|
|
||||||
};
|
|
||||||
|
|
||||||
const withEnvSnapshot = async <T>(run: () => Promise<T>): Promise<T> => {
|
|
||||||
const snapshot = envSnapshot();
|
|
||||||
try {
|
|
||||||
return await run();
|
|
||||||
} finally {
|
|
||||||
restoreEnv(snapshot);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const createTrackedTempDir = async (tempPaths: string[], prefix: string) => {
|
|
||||||
const dir = await makeTempDir(prefix);
|
|
||||||
tempPaths.push(dir);
|
|
||||||
return dir;
|
|
||||||
};
|
|
||||||
|
|
||||||
const createTrackedTempMedia = async (tempPaths: string[], ext: string) => {
|
|
||||||
const media = await makeTempMedia(ext);
|
|
||||||
tempPaths.push(media.dir);
|
|
||||||
return media.filePath;
|
|
||||||
};
|
|
||||||
|
|
||||||
describe("media understanding auto-detect (e2e)", () => {
|
|
||||||
let tempPaths: string[] = [];
|
|
||||||
|
|
||||||
beforeEach(() => {
|
|
||||||
clearMediaUnderstandingBinaryCacheForTests();
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(async () => {
|
|
||||||
for (const p of tempPaths) {
|
|
||||||
await fs.rm(p, { recursive: true, force: true }).catch(() => {});
|
|
||||||
}
|
|
||||||
tempPaths = [];
|
|
||||||
});
|
|
||||||
|
|
||||||
it.skipIf(process.platform === "win32")("uses sherpa-onnx-offline when available", async () => {
|
|
||||||
await withEnvSnapshot(async () => {
|
|
||||||
const binDir = await createTrackedTempDir(tempPaths, "openclaw-bin-sherpa-");
|
|
||||||
const modelDir = await createTrackedTempDir(tempPaths, "openclaw-sherpa-model-");
|
|
||||||
|
|
||||||
await fs.writeFile(path.join(modelDir, "tokens.txt"), "a");
|
|
||||||
await fs.writeFile(path.join(modelDir, "encoder.onnx"), "a");
|
|
||||||
await fs.writeFile(path.join(modelDir, "decoder.onnx"), "a");
|
|
||||||
await fs.writeFile(path.join(modelDir, "joiner.onnx"), "a");
|
|
||||||
|
|
||||||
await writeExecutable(
|
|
||||||
binDir,
|
|
||||||
"sherpa-onnx-offline",
|
|
||||||
`#!/usr/bin/env bash\necho "{\\"text\\":\\"sherpa ok\\"}"\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
process.env.PATH = `${binDir}:/usr/bin:/bin`;
|
|
||||||
process.env.SHERPA_ONNX_MODEL_DIR = modelDir;
|
|
||||||
|
|
||||||
const filePath = await createTrackedTempMedia(tempPaths, ".wav");
|
|
||||||
|
|
||||||
const ctx: MsgContext = {
|
|
||||||
Body: "<media:audio>",
|
|
||||||
MediaPath: filePath,
|
|
||||||
MediaType: "audio/wav",
|
|
||||||
};
|
|
||||||
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
|
||||||
|
|
||||||
await applyMediaUnderstanding({ ctx, cfg });
|
|
||||||
|
|
||||||
expect(ctx.Transcript).toBe("sherpa ok");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it.skipIf(process.platform === "win32")("uses whisper-cli when sherpa is missing", async () => {
|
|
||||||
await withEnvSnapshot(async () => {
|
|
||||||
const binDir = await createTrackedTempDir(tempPaths, "openclaw-bin-whispercpp-");
|
|
||||||
const modelDir = await createTrackedTempDir(tempPaths, "openclaw-whispercpp-model-");
|
|
||||||
|
|
||||||
const modelPath = path.join(modelDir, "tiny.bin");
|
|
||||||
await fs.writeFile(modelPath, "model");
|
|
||||||
|
|
||||||
await writeExecutable(
|
|
||||||
binDir,
|
|
||||||
"whisper-cli",
|
|
||||||
"#!/usr/bin/env bash\n" +
|
|
||||||
'out=""\n' +
|
|
||||||
'prev=""\n' +
|
|
||||||
'for arg in "$@"; do\n' +
|
|
||||||
' if [ "$prev" = "-of" ]; then out="$arg"; break; fi\n' +
|
|
||||||
' prev="$arg"\n' +
|
|
||||||
"done\n" +
|
|
||||||
'if [ -n "$out" ]; then echo \'whisper cpp ok\' > "${out}.txt"; fi\n',
|
|
||||||
);
|
|
||||||
|
|
||||||
process.env.PATH = `${binDir}:/usr/bin:/bin`;
|
|
||||||
process.env.WHISPER_CPP_MODEL = modelPath;
|
|
||||||
|
|
||||||
const filePath = await createTrackedTempMedia(tempPaths, ".wav");
|
|
||||||
|
|
||||||
const ctx: MsgContext = {
|
|
||||||
Body: "<media:audio>",
|
|
||||||
MediaPath: filePath,
|
|
||||||
MediaType: "audio/wav",
|
|
||||||
};
|
|
||||||
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
|
||||||
|
|
||||||
await applyMediaUnderstanding({ ctx, cfg });
|
|
||||||
|
|
||||||
expect(ctx.Transcript).toBe("whisper cpp ok");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it.skipIf(process.platform === "win32")("uses gemini CLI for images when available", async () => {
|
|
||||||
await withEnvSnapshot(async () => {
|
|
||||||
const binDir = await createTrackedTempDir(tempPaths, "openclaw-bin-gemini-");
|
|
||||||
|
|
||||||
await writeExecutable(
|
|
||||||
binDir,
|
|
||||||
"gemini",
|
|
||||||
`#!/usr/bin/env bash\necho '{"response":"gemini ok"}'\n`,
|
|
||||||
);
|
|
||||||
|
|
||||||
process.env.PATH = `${binDir}:/usr/bin:/bin`;
|
|
||||||
|
|
||||||
const filePath = await createTrackedTempMedia(tempPaths, ".png");
|
|
||||||
|
|
||||||
const ctx: MsgContext = {
|
|
||||||
Body: "<media:image>",
|
|
||||||
MediaPath: filePath,
|
|
||||||
MediaType: "image/png",
|
|
||||||
};
|
|
||||||
const cfg: OpenClawConfig = { tools: { media: { image: {} } } };
|
|
||||||
|
|
||||||
await applyMediaUnderstanding({ ctx, cfg });
|
|
||||||
|
|
||||||
expect(ctx.Body).toContain("gemini ok");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("skips auto-detect when no supported binaries are available", async () => {
|
|
||||||
await withEnvSnapshot(async () => {
|
|
||||||
const emptyBinDir = await createTrackedTempDir(tempPaths, "openclaw-bin-empty-");
|
|
||||||
const isolatedAgentDir = await createTrackedTempDir(tempPaths, "openclaw-agent-empty-");
|
|
||||||
process.env.PATH = emptyBinDir;
|
|
||||||
delete process.env.SHERPA_ONNX_MODEL_DIR;
|
|
||||||
delete process.env.WHISPER_CPP_MODEL;
|
|
||||||
delete process.env.OPENAI_API_KEY;
|
|
||||||
delete process.env.GROQ_API_KEY;
|
|
||||||
delete process.env.DEEPGRAM_API_KEY;
|
|
||||||
delete process.env.GEMINI_API_KEY;
|
|
||||||
process.env.OPENCLAW_AGENT_DIR = isolatedAgentDir;
|
|
||||||
process.env.PI_CODING_AGENT_DIR = isolatedAgentDir;
|
|
||||||
|
|
||||||
const filePath = await createTrackedTempMedia(tempPaths, ".wav");
|
|
||||||
const ctx: MsgContext = {
|
|
||||||
Body: "<media:audio>",
|
|
||||||
MediaPath: filePath,
|
|
||||||
MediaType: "audio/wav",
|
|
||||||
};
|
|
||||||
const cfg: OpenClawConfig = { tools: { media: { audio: {} } } };
|
|
||||||
|
|
||||||
await applyMediaUnderstanding({ ctx, cfg });
|
|
||||||
|
|
||||||
expect(ctx.Transcript).toBeUndefined();
|
|
||||||
expect(ctx.Body).toBe("<media:audio>");
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Reference in New Issue
Block a user