feat: native image injection for vision-capable models

- Auto-detect and load images referenced in user prompts
- Inject history images at their original message positions
- Fix EXIF orientation - rotate before resizing in resizeToJpeg
- Sandbox security: validate paths, block remote URLs when sandbox enabled
- Prevent duplicate history image injection across turns
- Handle string-based user message content (convert to array)
- Add bounds check for message index in history processing
- Fix regex to properly match relative paths (./  ../)
- Add multi-image support for iMessage attachments
- Pass MAX_IMAGE_BYTES limit to image loading

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Tyler Yust
2026-01-17 03:10:10 -08:00
committed by Peter Steinberger
parent f7123ec30a
commit 8d74578ceb
9 changed files with 892 additions and 16 deletions

View File

@@ -102,7 +102,11 @@ describe("image tool implicit imageModel config", () => {
});
});
it("disables image tool when primary model already supports images", async () => {
it("keeps image tool available when primary model supports images (for explicit requests)", async () => {
// When the primary model supports images, we still keep the tool available
// because images are auto-injected into prompts. The tool description is
// adjusted via modelHasVision to discourage redundant usage.
vi.stubEnv("OPENAI_API_KEY", "test-key");
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-image-"));
const cfg: ClawdbotConfig = {
agents: {
@@ -119,8 +123,13 @@ describe("image tool implicit imageModel config", () => {
},
},
};
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toBeNull();
expect(createImageTool({ config: cfg, agentDir })).toBeNull();
// Tool should still be available for explicit image analysis requests
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({
primary: "openai/gpt-5-mini",
});
const tool = createImageTool({ config: cfg, agentDir, modelHasVision: true });
expect(tool).not.toBeNull();
expect(tool?.description).toContain("Only use this tool when the image was NOT already provided");
});
it("sandboxes image paths like the read tool", async () => {