mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 09:58:38 +00:00
fix(gateway): retry exec-read live tool probe
This commit is contained in:
@@ -28,7 +28,12 @@ import { DEFAULT_AGENT_ID } from "../routing/session-key.js";
|
|||||||
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
|
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
|
||||||
import { GatewayClient } from "./client.js";
|
import { GatewayClient } from "./client.js";
|
||||||
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
|
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
|
||||||
import { hasExpectedToolNonce, shouldRetryToolReadProbe } from "./live-tool-probe-utils.js";
|
import {
|
||||||
|
hasExpectedSingleNonce,
|
||||||
|
hasExpectedToolNonce,
|
||||||
|
shouldRetryExecReadProbe,
|
||||||
|
shouldRetryToolReadProbe,
|
||||||
|
} from "./live-tool-probe-utils.js";
|
||||||
import { startGatewayServer } from "./server.js";
|
import { startGatewayServer } from "./server.js";
|
||||||
import { extractPayloadText } from "./test-helpers.agent-results.js";
|
import { extractPayloadText } from "./test-helpers.agent-results.js";
|
||||||
|
|
||||||
@@ -862,41 +867,77 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
|||||||
logProgress(`${progressLabel}: tool-exec`);
|
logProgress(`${progressLabel}: tool-exec`);
|
||||||
const nonceC = randomUUID();
|
const nonceC = randomUUID();
|
||||||
const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
|
const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
|
||||||
|
const maxExecReadAttempts = 3;
|
||||||
const execReadProbe = await client.request<AgentFinalPayload>(
|
let execReadText = "";
|
||||||
"agent",
|
for (
|
||||||
{
|
let execReadAttempt = 0;
|
||||||
sessionKey,
|
execReadAttempt < maxExecReadAttempts;
|
||||||
idempotencyKey: `idem-${runIdTool}-exec-read`,
|
execReadAttempt += 1
|
||||||
message:
|
|
||||||
"OpenClaw live tool probe (local, safe): " +
|
|
||||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
|
||||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
|
||||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
|
||||||
"Finally reply including the nonce text you read back.",
|
|
||||||
thinking: params.thinkingLevel,
|
|
||||||
deliver: false,
|
|
||||||
},
|
|
||||||
{ expectFinal: true },
|
|
||||||
);
|
|
||||||
if (execReadProbe?.status !== "ok") {
|
|
||||||
throw new Error(`exec+read probe failed: status=${String(execReadProbe?.status)}`);
|
|
||||||
}
|
|
||||||
const execReadText = extractPayloadText(execReadProbe?.result);
|
|
||||||
if (
|
|
||||||
isEmptyStreamText(execReadText) &&
|
|
||||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
|
||||||
) {
|
) {
|
||||||
logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
|
const strictReply = execReadAttempt > 0;
|
||||||
break;
|
const execReadProbe = await client.request<AgentFinalPayload>(
|
||||||
|
"agent",
|
||||||
|
{
|
||||||
|
sessionKey,
|
||||||
|
idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
|
||||||
|
message: strictReply
|
||||||
|
? "OpenClaw live tool probe (local, safe): " +
|
||||||
|
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||||
|
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||||
|
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||||
|
`Then reply with exactly: ${nonceC}. No extra text.`
|
||||||
|
: "OpenClaw live tool probe (local, safe): " +
|
||||||
|
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||||
|
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||||
|
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||||
|
"Finally reply including the nonce text you read back.",
|
||||||
|
thinking: params.thinkingLevel,
|
||||||
|
deliver: false,
|
||||||
|
},
|
||||||
|
{ expectFinal: true },
|
||||||
|
);
|
||||||
|
if (execReadProbe?.status !== "ok") {
|
||||||
|
if (execReadAttempt + 1 < maxExecReadAttempts) {
|
||||||
|
logProgress(
|
||||||
|
`${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) status=${String(execReadProbe?.status)}`,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw new Error(`exec+read probe failed: status=${String(execReadProbe?.status)}`);
|
||||||
|
}
|
||||||
|
execReadText = extractPayloadText(execReadProbe?.result);
|
||||||
|
if (
|
||||||
|
isEmptyStreamText(execReadText) &&
|
||||||
|
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||||
|
) {
|
||||||
|
logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assertNoReasoningTags({
|
||||||
|
text: execReadText,
|
||||||
|
model: modelKey,
|
||||||
|
phase: "tool-exec",
|
||||||
|
label: params.label,
|
||||||
|
});
|
||||||
|
if (hasExpectedSingleNonce(execReadText, nonceC)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
shouldRetryExecReadProbe({
|
||||||
|
text: execReadText,
|
||||||
|
nonce: nonceC,
|
||||||
|
attempt: execReadAttempt,
|
||||||
|
maxAttempts: maxExecReadAttempts,
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
logProgress(
|
||||||
|
`${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) malformed tool output`,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
||||||
}
|
}
|
||||||
assertNoReasoningTags({
|
if (!hasExpectedSingleNonce(execReadText, nonceC)) {
|
||||||
text: execReadText,
|
|
||||||
model: modelKey,
|
|
||||||
phase: "tool-exec",
|
|
||||||
label: params.label,
|
|
||||||
});
|
|
||||||
if (!execReadText.includes(nonceC)) {
|
|
||||||
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { hasExpectedToolNonce, shouldRetryToolReadProbe } from "./live-tool-probe-utils.js";
|
import {
|
||||||
|
hasExpectedSingleNonce,
|
||||||
|
hasExpectedToolNonce,
|
||||||
|
shouldRetryExecReadProbe,
|
||||||
|
shouldRetryToolReadProbe,
|
||||||
|
} from "./live-tool-probe-utils.js";
|
||||||
|
|
||||||
describe("live tool probe utils", () => {
|
describe("live tool probe utils", () => {
|
||||||
it("matches nonce pair when both are present", () => {
|
it("matches nonce pair when both are present", () => {
|
||||||
@@ -7,6 +12,11 @@ describe("live tool probe utils", () => {
|
|||||||
expect(hasExpectedToolNonce("value a-1 only", "a-1", "b-2")).toBe(false);
|
expect(hasExpectedToolNonce("value a-1 only", "a-1", "b-2")).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("matches single nonce when present", () => {
|
||||||
|
expect(hasExpectedSingleNonce("value nonce-1", "nonce-1")).toBe(true);
|
||||||
|
expect(hasExpectedSingleNonce("value nonce-2", "nonce-1")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
it("retries malformed tool output when attempts remain", () => {
|
it("retries malformed tool output when attempts remain", () => {
|
||||||
expect(
|
expect(
|
||||||
shouldRetryToolReadProbe({
|
shouldRetryToolReadProbe({
|
||||||
@@ -97,4 +107,37 @@ describe("live tool probe utils", () => {
|
|||||||
}),
|
}),
|
||||||
).toBe(false);
|
).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("retries malformed exec+read output when attempts remain", () => {
|
||||||
|
expect(
|
||||||
|
shouldRetryExecReadProbe({
|
||||||
|
text: "read[object Object]",
|
||||||
|
nonce: "nonce-c",
|
||||||
|
attempt: 0,
|
||||||
|
maxAttempts: 3,
|
||||||
|
}),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not retry exec+read once max attempts are exhausted", () => {
|
||||||
|
expect(
|
||||||
|
shouldRetryExecReadProbe({
|
||||||
|
text: "read[object Object]",
|
||||||
|
nonce: "nonce-c",
|
||||||
|
attempt: 2,
|
||||||
|
maxAttempts: 3,
|
||||||
|
}),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not retry exec+read when nonce is present", () => {
|
||||||
|
expect(
|
||||||
|
shouldRetryExecReadProbe({
|
||||||
|
text: "nonce-c",
|
||||||
|
nonce: "nonce-c",
|
||||||
|
attempt: 0,
|
||||||
|
maxAttempts: 3,
|
||||||
|
}),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -2,6 +2,25 @@ export function hasExpectedToolNonce(text: string, nonceA: string, nonceB: strin
|
|||||||
return text.includes(nonceA) && text.includes(nonceB);
|
return text.includes(nonceA) && text.includes(nonceB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function hasExpectedSingleNonce(text: string, nonce: string): boolean {
|
||||||
|
return text.includes(nonce);
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasMalformedToolOutput(text: string): boolean {
|
||||||
|
const trimmed = text.trim();
|
||||||
|
if (!trimmed) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const lower = trimmed.toLowerCase();
|
||||||
|
if (trimmed.includes("[object Object]")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (/\bread\s*\[/.test(lower) || /\btool\b/.test(lower) || /\bfunction\b/.test(lower)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
export function shouldRetryToolReadProbe(params: {
|
export function shouldRetryToolReadProbe(params: {
|
||||||
text: string;
|
text: string;
|
||||||
nonceA: string;
|
nonceA: string;
|
||||||
@@ -16,19 +35,27 @@ export function shouldRetryToolReadProbe(params: {
|
|||||||
if (hasExpectedToolNonce(params.text, params.nonceA, params.nonceB)) {
|
if (hasExpectedToolNonce(params.text, params.nonceA, params.nonceB)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const trimmed = params.text.trim();
|
if (hasMalformedToolOutput(params.text)) {
|
||||||
if (!trimmed) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
const lower = trimmed.toLowerCase();
|
|
||||||
if (trimmed.includes("[object Object]")) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (/\bread\s*\[/.test(lower) || /\btool\b/.test(lower) || /\bfunction\b/.test(lower)) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
const lower = params.text.trim().toLowerCase();
|
||||||
if (params.provider === "mistral" && (lower.includes("noncea=") || lower.includes("nonceb="))) {
|
if (params.provider === "mistral" && (lower.includes("noncea=") || lower.includes("nonceb="))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function shouldRetryExecReadProbe(params: {
|
||||||
|
text: string;
|
||||||
|
nonce: string;
|
||||||
|
attempt: number;
|
||||||
|
maxAttempts: number;
|
||||||
|
}): boolean {
|
||||||
|
if (params.attempt + 1 >= params.maxAttempts) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hasExpectedSingleNonce(params.text, params.nonce)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return hasMalformedToolOutput(params.text);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user