mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 16:03:30 +00:00
fix: harden loop detector and runnable coverage
This commit is contained in:
@@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
|
||||
- Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
|
||||
- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808)
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
|
||||
- Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
|
||||
- Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
|
||||
|
||||
@@ -3,7 +3,6 @@ import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-sessio
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { toClientToolDefinitions, toToolDefinitions } from "./pi-tool-definition-adapter.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
|
||||
|
||||
vi.mock("../plugins/hook-runner-global.js");
|
||||
|
||||
@@ -111,104 +110,6 @@ describe("before_tool_call hook integration", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("before_tool_call loop detection behavior", () => {
|
||||
let hookRunner: {
|
||||
hasHooks: ReturnType<typeof vi.fn>;
|
||||
runBeforeToolCall: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
};
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
|
||||
hookRunner.hasHooks.mockReturnValue(false);
|
||||
});
|
||||
|
||||
it("blocks known poll loops when no progress repeats", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
});
|
||||
|
||||
it("does not block known poll loops when output progresses", async () => {
|
||||
const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
|
||||
return {
|
||||
content: [{ type: "text", text: `output ${toolCallId}` }],
|
||||
details: { status: "running", aggregated: `output ${toolCallId}` },
|
||||
};
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-2" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(
|
||||
tool.execute(`poll-progress-${i}`, params, undefined, undefined),
|
||||
).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps generic repeated calls warn-only below global breaker", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("global circuit breaker");
|
||||
});
|
||||
});
|
||||
|
||||
describe("before_tool_call hook deduplication (#15502)", () => {
|
||||
let hookRunner: {
|
||||
hasHooks: ReturnType<typeof vi.fn>;
|
||||
|
||||
107
src/agents/pi-tools.before-tool-call.test.ts
Normal file
107
src/agents/pi-tools.before-tool-call.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
|
||||
|
||||
vi.mock("../plugins/hook-runner-global.js");
|
||||
|
||||
const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner);
|
||||
|
||||
describe("before_tool_call loop detection behavior", () => {
|
||||
let hookRunner: {
|
||||
hasHooks: ReturnType<typeof vi.fn>;
|
||||
runBeforeToolCall: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
};
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
|
||||
hookRunner.hasHooks.mockReturnValue(false);
|
||||
});
|
||||
|
||||
it("blocks known poll loops when no progress repeats", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
});
|
||||
|
||||
it("does not block known poll loops when output progresses", async () => {
|
||||
const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
|
||||
return {
|
||||
content: [{ type: "text", text: `output ${toolCallId}` }],
|
||||
details: { status: "running", aggregated: `output ${toolCallId}` },
|
||||
};
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-2" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(
|
||||
tool.execute(`poll-progress-${i}`, params, undefined, undefined),
|
||||
).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps generic repeated calls warn-only below global breaker", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("global circuit breaker");
|
||||
});
|
||||
});
|
||||
@@ -16,6 +16,36 @@ const BEFORE_TOOL_CALL_WRAPPED = Symbol("beforeToolCallWrapped");
|
||||
const adjustedParamsByToolCallId = new Map<string, unknown>();
|
||||
const MAX_TRACKED_ADJUSTED_PARAMS = 1024;
|
||||
|
||||
async function recordLoopOutcome(args: {
|
||||
ctx?: HookContext;
|
||||
toolName: string;
|
||||
toolParams: unknown;
|
||||
toolCallId?: string;
|
||||
result?: unknown;
|
||||
error?: unknown;
|
||||
}): Promise<void> {
|
||||
if (!args.ctx?.sessionKey) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
|
||||
const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
});
|
||||
recordToolCallOutcome(sessionState, {
|
||||
toolName: args.toolName,
|
||||
toolParams: args.toolParams,
|
||||
toolCallId: args.toolCallId,
|
||||
result: args.result,
|
||||
error: args.error,
|
||||
});
|
||||
} catch (err) {
|
||||
log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function runBeforeToolCallHook(args: {
|
||||
toolName: string;
|
||||
params: unknown;
|
||||
@@ -124,50 +154,22 @@ export function wrapToolWithBeforeToolCallHook(
|
||||
const normalizedToolName = normalizeToolName(toolName || "tool");
|
||||
try {
|
||||
const result = await execute(toolCallId, outcome.params, signal, onUpdate);
|
||||
if (ctx?.sessionKey) {
|
||||
try {
|
||||
const { getDiagnosticSessionState } =
|
||||
await import("../logging/diagnostic-session-state.js");
|
||||
const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: ctx.sessionKey,
|
||||
sessionId: ctx?.agentId,
|
||||
});
|
||||
recordToolCallOutcome(sessionState, {
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
result,
|
||||
});
|
||||
} catch (err) {
|
||||
log.warn(
|
||||
`tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
result,
|
||||
});
|
||||
return result;
|
||||
} catch (err) {
|
||||
if (ctx?.sessionKey) {
|
||||
try {
|
||||
const { getDiagnosticSessionState } =
|
||||
await import("../logging/diagnostic-session-state.js");
|
||||
const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: ctx.sessionKey,
|
||||
sessionId: ctx?.agentId,
|
||||
});
|
||||
recordToolCallOutcome(sessionState, {
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
error: err,
|
||||
});
|
||||
} catch (recordErr) {
|
||||
log.warn(
|
||||
`tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(recordErr)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
error: err,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
|
||||
@@ -68,6 +68,13 @@ describe("tool-loop-detection", () => {
|
||||
const hash2 = hashToolCall("tool", { b: 2, a: 1 });
|
||||
expect(hash1).toBe(hash2);
|
||||
});
|
||||
|
||||
it("keeps hashes fixed-size even for large params", () => {
|
||||
const payload = { data: "x".repeat(20_000) };
|
||||
const hash = hashToolCall("read", payload);
|
||||
expect(hash.startsWith("read:")).toBe(true);
|
||||
expect(hash.length).toBe("read:".length + 64);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordToolCall", () => {
|
||||
@@ -91,8 +98,7 @@ describe("tool-loop-detection", () => {
|
||||
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
|
||||
|
||||
const oldestCall = state.toolCallHistory?.[0];
|
||||
expect(oldestCall?.argsHash).toContain("iteration");
|
||||
expect(oldestCall?.argsHash).not.toContain('"iteration":0');
|
||||
expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 }));
|
||||
});
|
||||
|
||||
it("records timestamp for each call", () => {
|
||||
@@ -230,6 +236,26 @@ describe("tool-loop-detection", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("records fixed-size result hashes for large tool outputs", () => {
|
||||
const state = createState();
|
||||
const params = { action: "log", sessionId: "sess-big" };
|
||||
const toolCallId = "log-big";
|
||||
recordToolCall(state, "process", params, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "process",
|
||||
toolParams: params,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "y".repeat(40_000) }],
|
||||
details: { status: "running", totalLines: 1, totalChars: 40_000 },
|
||||
},
|
||||
});
|
||||
|
||||
const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId);
|
||||
expect(typeof entry?.resultHash).toBe("string");
|
||||
expect(entry?.resultHash?.length).toBe(64);
|
||||
});
|
||||
|
||||
it("handles empty history", () => {
|
||||
const state = createState();
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { isPlainObject } from "../utils.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/loop-detection");
|
||||
|
||||
@@ -14,15 +16,10 @@ export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30;
|
||||
|
||||
/**
|
||||
* Hash a tool call for pattern matching.
|
||||
* Uses tool name + deterministic JSON stringification of params.
|
||||
* Uses tool name + deterministic JSON serialization digest of params.
|
||||
*/
|
||||
export function hashToolCall(toolName: string, params: unknown): string {
|
||||
try {
|
||||
const paramsStr = stableStringify(params);
|
||||
return `${toolName}:${paramsStr}`;
|
||||
} catch {
|
||||
return `${toolName}:${String(params)}`;
|
||||
}
|
||||
return `${toolName}:${digestStable(params)}`;
|
||||
}
|
||||
|
||||
function stableStringify(value: unknown): string {
|
||||
@@ -37,8 +34,29 @@ function stableStringify(value: unknown): string {
|
||||
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`;
|
||||
}
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
function digestStable(value: unknown): string {
|
||||
const serialized = stableStringifyFallback(value);
|
||||
return createHash("sha256").update(serialized).digest("hex");
|
||||
}
|
||||
|
||||
function stableStringifyFallback(value: unknown): string {
|
||||
try {
|
||||
return stableStringify(value);
|
||||
} catch {
|
||||
if (value === null || value === undefined) {
|
||||
return `${value}`;
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") {
|
||||
return `${value}`;
|
||||
}
|
||||
if (value instanceof Error) {
|
||||
return `${value.name}:${value.message}`;
|
||||
}
|
||||
return Object.prototype.toString.call(value);
|
||||
}
|
||||
}
|
||||
|
||||
function isKnownPollToolCall(toolName: string, params: unknown): boolean {
|
||||
@@ -86,10 +104,10 @@ function hashToolOutcome(
|
||||
error: unknown,
|
||||
): string | undefined {
|
||||
if (error !== undefined) {
|
||||
return `error:${formatErrorForHash(error)}`;
|
||||
return `error:${digestStable(formatErrorForHash(error))}`;
|
||||
}
|
||||
if (!isPlainObject(result)) {
|
||||
return result === undefined ? undefined : stableStringify(result);
|
||||
return result === undefined ? undefined : digestStable(result);
|
||||
}
|
||||
|
||||
const details = isPlainObject(result.details) ? result.details : {};
|
||||
@@ -97,7 +115,7 @@ function hashToolOutcome(
|
||||
if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
|
||||
const action = params.action;
|
||||
if (action === "poll") {
|
||||
return stableStringify({
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
@@ -107,7 +125,7 @@ function hashToolOutcome(
|
||||
});
|
||||
}
|
||||
if (action === "log") {
|
||||
return stableStringify({
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
totalLines: details.totalLines ?? null,
|
||||
@@ -120,7 +138,7 @@ function hashToolOutcome(
|
||||
}
|
||||
}
|
||||
|
||||
return stableStringify({
|
||||
return digestStable({
|
||||
details,
|
||||
text,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user