mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 15:08:25 +00:00
fix(image): route MiniMax vision to VLM
This commit is contained in:
@@ -21,6 +21,8 @@ async function writeAuthProfiles(agentDir: string, profiles: unknown) {
|
||||
}
|
||||
|
||||
describe("image tool implicit imageModel config", () => {
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.stubEnv("OPENAI_API_KEY", "");
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "");
|
||||
@@ -30,6 +32,8 @@ describe("image tool implicit imageModel config", () => {
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllEnvs();
|
||||
// @ts-expect-error global fetch cleanup
|
||||
global.fetch = priorFetch;
|
||||
});
|
||||
|
||||
it("stays disabled without auth when no pairing is possible", async () => {
|
||||
@@ -132,6 +136,60 @@ describe("image tool implicit imageModel config", () => {
|
||||
tool.execute("t2", { image: "../escape.png" }),
|
||||
).rejects.toThrow(/escapes sandbox root/i);
|
||||
});
|
||||
|
||||
it("rewrites inbound absolute paths into sandbox media/inbound", async () => {
|
||||
const stateDir = await fs.mkdtemp(
|
||||
path.join(os.tmpdir(), "clawdbot-image-sandbox-"),
|
||||
);
|
||||
const agentDir = path.join(stateDir, "agent");
|
||||
const sandboxRoot = path.join(stateDir, "sandbox");
|
||||
await fs.mkdir(agentDir, { recursive: true });
|
||||
await fs.mkdir(path.join(sandboxRoot, "media", "inbound"), {
|
||||
recursive: true,
|
||||
});
|
||||
const pngB64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
await fs.writeFile(
|
||||
path.join(sandboxRoot, "media", "inbound", "photo.png"),
|
||||
Buffer.from(pngB64, "base64"),
|
||||
);
|
||||
|
||||
const fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: new Headers(),
|
||||
json: async () => ({
|
||||
content: "ok",
|
||||
base_resp: { status_code: 0, status_msg: "" },
|
||||
}),
|
||||
});
|
||||
// @ts-expect-error partial global
|
||||
global.fetch = fetch;
|
||||
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "minimax/MiniMax-M2.1" },
|
||||
imageModel: { primary: "minimax/MiniMax-VL-01" },
|
||||
},
|
||||
},
|
||||
};
|
||||
const tool = createImageTool({ config: cfg, agentDir, sandboxRoot });
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) throw new Error("expected image tool");
|
||||
|
||||
const res = await tool.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: "/Users/steipete/.clawdbot/media/inbound/photo.png",
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect((res.details as { rewrittenFrom?: string }).rewrittenFrom).toContain(
|
||||
"photo.png",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("image tool data URL support", () => {
|
||||
@@ -151,6 +209,99 @@ describe("image tool data URL support", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("image tool MiniMax VLM routing", () => {
|
||||
const pngB64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.stubEnv("MINIMAX_API_KEY", "");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllEnvs();
|
||||
// @ts-expect-error global fetch cleanup
|
||||
global.fetch = priorFetch;
|
||||
});
|
||||
|
||||
it("calls /v1/coding_plan/vlm for minimax image models", async () => {
|
||||
const fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: new Headers(),
|
||||
json: async () => ({
|
||||
content: "ok",
|
||||
base_resp: { status_code: 0, status_msg: "" },
|
||||
}),
|
||||
});
|
||||
// @ts-expect-error partial global
|
||||
global.fetch = fetch;
|
||||
|
||||
const agentDir = await fs.mkdtemp(
|
||||
path.join(os.tmpdir(), "clawdbot-minimax-vlm-"),
|
||||
);
|
||||
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: { defaults: { model: { primary: "minimax/MiniMax-M2.1" } } },
|
||||
};
|
||||
const tool = createImageTool({ config: cfg, agentDir });
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) throw new Error("expected image tool");
|
||||
|
||||
const res = await tool.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: `data:image/png;base64,${pngB64}`,
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
const [url, init] = fetch.mock.calls[0];
|
||||
expect(String(url)).toBe("https://api.minimax.io/v1/coding_plan/vlm");
|
||||
expect(init?.method).toBe("POST");
|
||||
expect(
|
||||
String((init?.headers as Record<string, string>)?.Authorization),
|
||||
).toBe("Bearer minimax-test");
|
||||
expect(String(init?.body)).toContain('"prompt":"Describe the image."');
|
||||
expect(String(init?.body)).toContain('"image_url":"data:image/png;base64,');
|
||||
|
||||
const text = res.content?.find((b) => b.type === "text")?.text ?? "";
|
||||
expect(text).toBe("ok");
|
||||
});
|
||||
|
||||
it("surfaces MiniMax API errors from /v1/coding_plan/vlm", async () => {
|
||||
const fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: new Headers(),
|
||||
json: async () => ({
|
||||
content: "",
|
||||
base_resp: { status_code: 1004, status_msg: "bad key" },
|
||||
}),
|
||||
});
|
||||
// @ts-expect-error partial global
|
||||
global.fetch = fetch;
|
||||
|
||||
const agentDir = await fs.mkdtemp(
|
||||
path.join(os.tmpdir(), "clawdbot-minimax-vlm-"),
|
||||
);
|
||||
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||
const cfg: ClawdbotConfig = {
|
||||
agents: { defaults: { model: { primary: "minimax/MiniMax-M2.1" } } },
|
||||
};
|
||||
const tool = createImageTool({ config: cfg, agentDir });
|
||||
expect(tool).not.toBeNull();
|
||||
if (!tool) throw new Error("expected image tool");
|
||||
|
||||
await expect(
|
||||
tool.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: `data:image/png;base64,${pngB64}`,
|
||||
}),
|
||||
).rejects.toThrow(/MiniMax VLM API error/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe("image tool response validation", () => {
|
||||
it("rejects image-model responses with no final text", () => {
|
||||
expect(() =>
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
import {
|
||||
type Api,
|
||||
type AssistantMessage,
|
||||
@@ -19,6 +22,7 @@ import {
|
||||
listProfilesForProvider,
|
||||
} from "../auth-profiles.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||
import { getApiKeyForModel, resolveEnvApiKey } from "../model-auth.js";
|
||||
import { runWithImageModelFallback } from "../model-fallback.js";
|
||||
import { parseModelRef } from "../model-selection.js";
|
||||
@@ -278,6 +282,38 @@ function buildImageContext(
|
||||
};
|
||||
}
|
||||
|
||||
async function resolveSandboxedImagePath(params: {
|
||||
sandboxRoot: string;
|
||||
imagePath: string;
|
||||
}): Promise<{ resolved: string; rewrittenFrom?: string }> {
|
||||
const normalize = (p: string) =>
|
||||
p.startsWith("file://") ? p.slice("file://".length) : p;
|
||||
const filePath = normalize(params.imagePath);
|
||||
try {
|
||||
const out = await assertSandboxPath({
|
||||
filePath,
|
||||
cwd: params.sandboxRoot,
|
||||
root: params.sandboxRoot,
|
||||
});
|
||||
return { resolved: out.resolved };
|
||||
} catch (err) {
|
||||
const name = path.basename(filePath);
|
||||
const candidateRel = path.join("media", "inbound", name);
|
||||
const candidateAbs = path.join(params.sandboxRoot, candidateRel);
|
||||
try {
|
||||
await fs.stat(candidateAbs);
|
||||
} catch {
|
||||
throw err;
|
||||
}
|
||||
const out = await assertSandboxPath({
|
||||
filePath: candidateRel,
|
||||
cwd: params.sandboxRoot,
|
||||
root: params.sandboxRoot,
|
||||
});
|
||||
return { resolved: out.resolved, rewrittenFrom: filePath };
|
||||
}
|
||||
}
|
||||
|
||||
async function runImagePrompt(params: {
|
||||
cfg?: ClawdbotConfig;
|
||||
agentDir: string;
|
||||
@@ -328,6 +364,18 @@ async function runImagePrompt(params: {
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
|
||||
const imageDataUrl = `data:${params.mimeType};base64,${params.base64}`;
|
||||
|
||||
if (model.provider === "minimax") {
|
||||
const text = await minimaxUnderstandImage({
|
||||
apiKey: apiKeyInfo.apiKey,
|
||||
prompt: params.prompt,
|
||||
imageDataUrl,
|
||||
modelBaseUrl: model.baseUrl,
|
||||
});
|
||||
return { text, provider: model.provider, model: model.id };
|
||||
}
|
||||
|
||||
const context = buildImageContext(
|
||||
params.prompt,
|
||||
params.base64,
|
||||
@@ -337,23 +385,19 @@ async function runImagePrompt(params: {
|
||||
apiKey: apiKeyInfo.apiKey,
|
||||
maxTokens: 512,
|
||||
})) as AssistantMessage;
|
||||
return {
|
||||
const text = coerceImageAssistantText({
|
||||
message,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
};
|
||||
});
|
||||
return { text, provider: model.provider, model: model.id };
|
||||
},
|
||||
});
|
||||
|
||||
const text = coerceImageAssistantText({
|
||||
message: result.result.message,
|
||||
return {
|
||||
text: result.result.text,
|
||||
provider: result.result.provider,
|
||||
model: result.result.model,
|
||||
});
|
||||
return {
|
||||
text,
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
attempts: result.attempts.map((attempt) => ({
|
||||
provider: attempt.provider,
|
||||
model: attempt.model,
|
||||
@@ -423,21 +467,20 @@ export function createImageTool(options?: {
|
||||
if (imageRaw.startsWith("~")) return resolveUserPath(imageRaw);
|
||||
return imageRaw;
|
||||
})();
|
||||
const resolvedPath = isDataUrl
|
||||
? null
|
||||
: sandboxRoot
|
||||
? (
|
||||
await assertSandboxPath({
|
||||
filePath: resolvedImage.startsWith("file://")
|
||||
const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } =
|
||||
isDataUrl
|
||||
? { resolved: "" }
|
||||
: sandboxRoot
|
||||
? await resolveSandboxedImagePath({
|
||||
sandboxRoot,
|
||||
imagePath: resolvedImage,
|
||||
})
|
||||
: {
|
||||
resolved: resolvedImage.startsWith("file://")
|
||||
? resolvedImage.slice("file://".length)
|
||||
: resolvedImage,
|
||||
cwd: sandboxRoot,
|
||||
root: sandboxRoot,
|
||||
})
|
||||
).resolved
|
||||
: resolvedImage.startsWith("file://")
|
||||
? resolvedImage.slice("file://".length)
|
||||
: resolvedImage;
|
||||
};
|
||||
const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
|
||||
|
||||
const media = isDataUrl
|
||||
? decodeDataUrl(resolvedImage)
|
||||
@@ -465,6 +508,9 @@ export function createImageTool(options?: {
|
||||
details: {
|
||||
model: `${result.provider}/${result.model}`,
|
||||
image: resolvedImage,
|
||||
...(resolvedPathInfo.rewrittenFrom
|
||||
? { rewrittenFrom: resolvedPathInfo.rewrittenFrom }
|
||||
: {}),
|
||||
attempts: result.attempts,
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user