feat(gateway): add agent image attachments + live probe

This commit is contained in:
Peter Steinberger
2026-01-10 20:34:34 +00:00
parent b9b1bc2726
commit 9790b39d80
7 changed files with 439 additions and 2 deletions

View File

@@ -1,4 +1,4 @@
import { randomUUID } from "node:crypto";
import { randomBytes, randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import { createServer } from "node:net";
import os from "node:os";
@@ -16,6 +16,7 @@ import { ensureClawdbotModelsJson } from "../agents/models-config.js";
import { loadConfig } from "../config/config.js";
import { resolveUserPath } from "../utils.js";
import { GatewayClient } from "./client.js";
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
import { startGatewayServer } from "./server.js";
const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
@@ -24,6 +25,8 @@ const ALL_MODELS =
process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" ||
process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all";
const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1";
const EXTRA_IMAGE_PROBES =
process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1";
const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip;
@@ -60,6 +63,43 @@ function isMeaningful(text: string): boolean {
return true;
}
function randomImageProbeCode(len = 10): string {
const alphabet = "2345689ABCEF";
const bytes = randomBytes(len);
let out = "";
for (let i = 0; i < len; i += 1) {
out += alphabet[bytes[i] % alphabet.length];
}
return out;
}
function editDistance(a: string, b: string): number {
if (a === b) return 0;
const aLen = a.length;
const bLen = b.length;
if (aLen === 0) return bLen;
if (bLen === 0) return aLen;
let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx);
let curr = Array.from({ length: bLen + 1 }, () => 0);
for (let i = 1; i <= aLen; i += 1) {
curr[0] = i;
const aCh = a.charCodeAt(i - 1);
for (let j = 1; j <= bLen; j += 1) {
const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1;
curr[j] = Math.min(
prev[j] + 1, // delete
curr[j - 1] + 1, // insert
prev[j - 1] + cost, // substitute
);
}
[prev, curr] = [curr, prev];
}
return prev[bLen] ?? Number.POSITIVE_INFINITY;
}
async function getFreePort(): Promise<number> {
return await new Promise((resolve, reject) => {
const srv = createServer();
@@ -204,6 +244,14 @@ describeLive("gateway live (dev agent, profile keys)", () => {
}
expect(candidates.length).toBeGreaterThan(0);
const imageCandidates = EXTRA_IMAGE_PROBES
? candidates.filter((m) => m.input?.includes("image"))
: [];
if (EXTRA_IMAGE_PROBES && imageCandidates.length === 0) {
throw new Error(
"image probe enabled but no selected models advertise image support; set CLAWDBOT_LIVE_GATEWAY_MODELS to include an image-capable model",
);
}
// Build a temp config that allows all selected models, so session overrides stick.
const lmstudioProvider = cfg.models?.providers?.lmstudio;
@@ -365,6 +413,53 @@ describeLive("gateway live (dev agent, profile keys)", () => {
await fs.rm(toolWritePath, { force: true });
}
if (EXTRA_IMAGE_PROBES && model.input?.includes("image")) {
const imageCode = randomImageProbeCode(10);
const imageBase64 = renderCatNoncePngBase64(imageCode);
const runIdImage = randomUUID();
const imageProbe = await client.request<AgentFinalPayload>(
"agent",
{
sessionKey,
idempotencyKey: `idem-${runIdImage}-image`,
message:
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
"(1) the animal shown or written in the image, lowercase; " +
"(2) the code printed in the image, uppercase. No extra text.",
attachments: [
{
mimeType: "image/png",
fileName: `probe-${runIdImage}.png`,
content: imageBase64,
},
],
deliver: false,
},
{ expectFinal: true },
);
if (imageProbe?.status !== "ok") {
throw new Error(
`image probe failed: status=${String(imageProbe?.status)}`,
);
}
const imageText = extractPayloadText(imageProbe?.result);
if (!/\bcat\b/i.test(imageText)) {
throw new Error(`image probe missing 'cat': ${imageText}`);
}
const candidates =
imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
const bestDistance = candidates.reduce((best, cand) => {
if (Math.abs(cand.length - imageCode.length) > 2) return best;
return Math.min(best, editDistance(cand, imageCode));
}, Number.POSITIVE_INFINITY);
if (!(bestDistance <= 1)) {
throw new Error(
`image probe missing code (${imageCode}): ${imageText}`,
);
}
}
// Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
if (
(model.provider === "openai" &&