mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 06:22:42 +00:00
feat(gateway): add agent image attachments + live probe
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { randomBytes, randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import { createServer } from "node:net";
|
||||
import os from "node:os";
|
||||
@@ -16,6 +16,7 @@ import { ensureClawdbotModelsJson } from "../agents/models-config.js";
|
||||
import { loadConfig } from "../config/config.js";
|
||||
import { resolveUserPath } from "../utils.js";
|
||||
import { GatewayClient } from "./client.js";
|
||||
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
|
||||
import { startGatewayServer } from "./server.js";
|
||||
|
||||
const LIVE = process.env.LIVE === "1" || process.env.CLAWDBOT_LIVE_TEST === "1";
|
||||
@@ -24,6 +25,8 @@ const ALL_MODELS =
|
||||
process.env.CLAWDBOT_LIVE_GATEWAY_ALL_MODELS === "1" ||
|
||||
process.env.CLAWDBOT_LIVE_GATEWAY_MODELS === "all";
|
||||
const EXTRA_TOOL_PROBES = process.env.CLAWDBOT_LIVE_GATEWAY_TOOL_PROBE === "1";
|
||||
const EXTRA_IMAGE_PROBES =
|
||||
process.env.CLAWDBOT_LIVE_GATEWAY_IMAGE_PROBE === "1";
|
||||
|
||||
const describeLive = LIVE && GATEWAY_LIVE ? describe : describe.skip;
|
||||
|
||||
@@ -60,6 +63,43 @@ function isMeaningful(text: string): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function randomImageProbeCode(len = 10): string {
|
||||
const alphabet = "2345689ABCEF";
|
||||
const bytes = randomBytes(len);
|
||||
let out = "";
|
||||
for (let i = 0; i < len; i += 1) {
|
||||
out += alphabet[bytes[i] % alphabet.length];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function editDistance(a: string, b: string): number {
|
||||
if (a === b) return 0;
|
||||
const aLen = a.length;
|
||||
const bLen = b.length;
|
||||
if (aLen === 0) return bLen;
|
||||
if (bLen === 0) return aLen;
|
||||
|
||||
let prev = Array.from({ length: bLen + 1 }, (_v, idx) => idx);
|
||||
let curr = Array.from({ length: bLen + 1 }, () => 0);
|
||||
|
||||
for (let i = 1; i <= aLen; i += 1) {
|
||||
curr[0] = i;
|
||||
const aCh = a.charCodeAt(i - 1);
|
||||
for (let j = 1; j <= bLen; j += 1) {
|
||||
const cost = aCh === b.charCodeAt(j - 1) ? 0 : 1;
|
||||
curr[j] = Math.min(
|
||||
prev[j] + 1, // delete
|
||||
curr[j - 1] + 1, // insert
|
||||
prev[j - 1] + cost, // substitute
|
||||
);
|
||||
}
|
||||
[prev, curr] = [curr, prev];
|
||||
}
|
||||
|
||||
return prev[bLen] ?? Number.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
async function getFreePort(): Promise<number> {
|
||||
return await new Promise((resolve, reject) => {
|
||||
const srv = createServer();
|
||||
@@ -204,6 +244,14 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
}
|
||||
|
||||
expect(candidates.length).toBeGreaterThan(0);
|
||||
const imageCandidates = EXTRA_IMAGE_PROBES
|
||||
? candidates.filter((m) => m.input?.includes("image"))
|
||||
: [];
|
||||
if (EXTRA_IMAGE_PROBES && imageCandidates.length === 0) {
|
||||
throw new Error(
|
||||
"image probe enabled but no selected models advertise image support; set CLAWDBOT_LIVE_GATEWAY_MODELS to include an image-capable model",
|
||||
);
|
||||
}
|
||||
|
||||
// Build a temp config that allows all selected models, so session overrides stick.
|
||||
const lmstudioProvider = cfg.models?.providers?.lmstudio;
|
||||
@@ -365,6 +413,53 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
||||
await fs.rm(toolWritePath, { force: true });
|
||||
}
|
||||
|
||||
if (EXTRA_IMAGE_PROBES && model.input?.includes("image")) {
|
||||
const imageCode = randomImageProbeCode(10);
|
||||
const imageBase64 = renderCatNoncePngBase64(imageCode);
|
||||
const runIdImage = randomUUID();
|
||||
|
||||
const imageProbe = await client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdImage}-image`,
|
||||
message:
|
||||
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
|
||||
"(1) the animal shown or written in the image, lowercase; " +
|
||||
"(2) the code printed in the image, uppercase. No extra text.",
|
||||
attachments: [
|
||||
{
|
||||
mimeType: "image/png",
|
||||
fileName: `probe-${runIdImage}.png`,
|
||||
content: imageBase64,
|
||||
},
|
||||
],
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
);
|
||||
if (imageProbe?.status !== "ok") {
|
||||
throw new Error(
|
||||
`image probe failed: status=${String(imageProbe?.status)}`,
|
||||
);
|
||||
}
|
||||
const imageText = extractPayloadText(imageProbe?.result);
|
||||
if (!/\bcat\b/i.test(imageText)) {
|
||||
throw new Error(`image probe missing 'cat': ${imageText}`);
|
||||
}
|
||||
const candidates =
|
||||
imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
|
||||
const bestDistance = candidates.reduce((best, cand) => {
|
||||
if (Math.abs(cand.length - imageCode.length) > 2) return best;
|
||||
return Math.min(best, editDistance(cand, imageCode));
|
||||
}, Number.POSITIVE_INFINITY);
|
||||
if (!(bestDistance <= 1)) {
|
||||
throw new Error(
|
||||
`image probe missing code (${imageCode}): ${imageText}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
|
||||
if (
|
||||
(model.provider === "openai" &&
|
||||
|
||||
Reference in New Issue
Block a user