fix(agents): restore multi-image image tool schema contract

This commit is contained in:
Sebastian
2026-02-16 21:34:27 -05:00
parent 966e5560f8
commit 391796a3fb
3 changed files with 167 additions and 17 deletions

View File

@@ -18,6 +18,7 @@ async function writeAuthProfiles(agentDir: string, profiles: unknown) {
const ONE_PIXEL_PNG_B64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
const ONE_PIXEL_GIF_B64 = "R0lGODlhAQABAIABAP///wAAACwAAAAAAQABAAACAkQBADs=";
async function withTempWorkspacePng(
cb: (args: { workspaceDir: string; imagePath: string }) => Promise<void>,
@@ -78,6 +79,25 @@ async function expectImageToolExecOk(
});
}
function findSchemaUnionKeywords(schema: unknown, path = "root"): string[] {
if (!schema || typeof schema !== "object") {
return [];
}
if (Array.isArray(schema)) {
return schema.flatMap((item, index) => findSchemaUnionKeywords(item, `${path}[${index}]`));
}
const record = schema as Record<string, unknown>;
const out: string[] = [];
for (const [key, value] of Object.entries(record)) {
const nextPath = `${path}.${key}`;
if (key === "anyOf" || key === "oneOf" || key === "allOf") {
out.push(nextPath);
}
out.push(...findSchemaUnionKeywords(value, nextPath));
}
return out;
}
describe("image tool implicit imageModel config", () => {
const priorFetch = global.fetch;
@@ -211,6 +231,66 @@ describe("image tool implicit imageModel config", () => {
expect(tool?.description).toContain("Only use this tool when images were NOT already provided");
});
it("exposes an Anthropic-safe image schema without union keywords", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
try {
const cfg = createMinimaxImageConfig();
const tool = createImageTool({ config: cfg, agentDir });
expect(tool).not.toBeNull();
if (!tool) {
throw new Error("expected image tool");
}
const violations = findSchemaUnionKeywords(tool.parameters, "image.parameters");
expect(violations).toEqual([]);
const schema = tool.parameters as {
properties?: Record<string, unknown>;
};
const imageSchema = schema.properties?.image as { type?: unknown } | undefined;
const imagesSchema = schema.properties?.images as
| { type?: unknown; items?: unknown }
| undefined;
const imageItems = imagesSchema?.items as { type?: unknown } | undefined;
expect(imageSchema?.type).toBe("string");
expect(imagesSchema?.type).toBe("array");
expect(imageItems?.type).toBe("string");
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("keeps an Anthropic-safe image schema snapshot", async () => {
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
try {
const cfg = createMinimaxImageConfig();
const tool = createImageTool({ config: cfg, agentDir });
expect(tool).not.toBeNull();
if (!tool) {
throw new Error("expected image tool");
}
expect(JSON.parse(JSON.stringify(tool.parameters))).toEqual({
type: "object",
properties: {
prompt: { type: "string" },
image: { description: "Single image path or URL.", type: "string" },
images: {
description: "Multiple image paths or URLs (up to maxImages, default 20).",
type: "array",
items: { type: "string" },
},
model: { type: "string" },
maxBytesMb: { type: "number" },
maxImages: { type: "number" },
},
});
} finally {
await fs.rm(agentDir, { recursive: true, force: true });
}
});
it("allows workspace images outside default local media roots", async () => {
await withTempWorkspacePng(async ({ workspaceDir, imagePath }) => {
const fetch = stubMinimaxOkFetch();
@@ -412,7 +492,7 @@ describe("image tool MiniMax VLM routing", () => {
return { fetch, tool };
}
it("calls /v1/coding_plan/vlm for minimax image models", async () => {
it("accepts image for single-image requests and calls /v1/coding_plan/vlm", async () => {
const { fetch, tool } = await createMinimaxVlmFixture({ status_code: 0, status_msg: "" });
const res = await tool.execute("t1", {
@@ -434,6 +514,59 @@ describe("image tool MiniMax VLM routing", () => {
expect(text).toBe("ok");
});
it("accepts images[] for multi-image requests", async () => {
const { fetch, tool } = await createMinimaxVlmFixture({ status_code: 0, status_msg: "" });
const res = await tool.execute("t1", {
prompt: "Compare these images.",
images: [`data:image/png;base64,${pngB64}`, `data:image/gif;base64,${ONE_PIXEL_GIF_B64}`],
});
expect(fetch).toHaveBeenCalledTimes(1);
const details = res.details as
| {
images?: Array<{ image: string }>;
}
| undefined;
expect(details?.images).toHaveLength(2);
});
it("combines image + images with dedupe and enforces maxImages", async () => {
const { fetch, tool } = await createMinimaxVlmFixture({ status_code: 0, status_msg: "" });
const deduped = await tool.execute("t1", {
prompt: "Compare these images.",
image: `data:image/png;base64,${pngB64}`,
images: [
`data:image/png;base64,${pngB64}`,
`data:image/gif;base64,${ONE_PIXEL_GIF_B64}`,
`data:image/gif;base64,${ONE_PIXEL_GIF_B64}`,
],
});
expect(fetch).toHaveBeenCalledTimes(1);
const dedupedDetails = deduped.details as
| {
images?: Array<{ image: string }>;
}
| undefined;
expect(dedupedDetails?.images).toHaveLength(2);
const tooMany = await tool.execute("t2", {
prompt: "Compare these images.",
image: `data:image/png;base64,${pngB64}`,
images: [`data:image/gif;base64,${ONE_PIXEL_GIF_B64}`],
maxImages: 1,
});
expect(fetch).toHaveBeenCalledTimes(1);
expect(tooMany.details).toMatchObject({
error: "too_many_images",
count: 2,
max: 1,
});
});
it("surfaces MiniMax API errors from /v1/coding_plan/vlm", async () => {
const { tool } = await createMinimaxVlmFixture({ status_code: 1004, status_msg: "bad key" });