From 5ee87092fd8aa7cdf31c4efae83d860ce3810f90 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Mon, 16 Feb 2026 14:07:59 -0500 Subject: [PATCH] fix: harden loop detector and runnable coverage --- CHANGELOG.md | 2 +- .../pi-tools.before-tool-call.e2e.test.ts | 99 ---------------- src/agents/pi-tools.before-tool-call.test.ts | 107 ++++++++++++++++++ src/agents/pi-tools.before-tool-call.ts | 86 +++++++------- src/agents/tool-loop-detection.test.ts | 30 ++++- src/agents/tool-loop-detection.ts | 46 +++++--- 6 files changed, 212 insertions(+), 158 deletions(-) create mode 100644 src/agents/pi-tools.before-tool-call.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0400cd83e24..43053df7636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai - Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky. - Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow. - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost. -- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) +- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) Thanks @akramcodez and @beca-oc. - Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus. - Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT. - Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang. diff --git a/src/agents/pi-tools.before-tool-call.e2e.test.ts b/src/agents/pi-tools.before-tool-call.e2e.test.ts index 53c8f18c2ff..20145cb2af5 100644 --- a/src/agents/pi-tools.before-tool-call.e2e.test.ts +++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts @@ -3,7 +3,6 @@ import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-sessio import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; import { toClientToolDefinitions, toToolDefinitions } from "./pi-tool-definition-adapter.js"; import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js"; -import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js"; vi.mock("../plugins/hook-runner-global.js"); @@ -111,104 +110,6 @@ describe("before_tool_call hook integration", () => { }); }); -describe("before_tool_call loop detection behavior", () => { - let hookRunner: { - hasHooks: ReturnType; - runBeforeToolCall: ReturnType; - }; - - beforeEach(() => { - resetDiagnosticSessionStateForTest(); - hookRunner = { - hasHooks: vi.fn(), - runBeforeToolCall: vi.fn(), - }; - // oxlint-disable-next-line typescript/no-explicit-any - mockGetGlobalHookRunner.mockReturnValue(hookRunner as any); - hookRunner.hasHooks.mockReturnValue(false); - }); - - it("blocks known poll loops when no progress repeats", async () => { - const execute = vi.fn().mockResolvedValue({ - content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], - details: { status: "running", aggregated: "steady" }, - }); - // oxlint-disable-next-line typescript/no-explicit-any - const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, { - agentId: "main", - sessionKey: "main", - }); - const params = { action: "poll", sessionId: "sess-1" }; - - for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { - await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined(); - } - - await expect( - tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined), - ).rejects.toThrow("CRITICAL"); - }); - - it("does not block known poll loops when output progresses", async () => { - const execute = vi.fn().mockImplementation(async (toolCallId: string) => { - return { - content: [{ type: "text", text: `output ${toolCallId}` }], - details: { status: "running", aggregated: `output ${toolCallId}` }, - }; - }); - // oxlint-disable-next-line typescript/no-explicit-any - const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, { - agentId: "main", - sessionKey: "main", - }); - const params = { action: "poll", sessionId: "sess-2" }; - - for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { - await expect( - tool.execute(`poll-progress-${i}`, params, undefined, undefined), - ).resolves.toBeDefined(); - } - }); - - it("keeps generic repeated calls warn-only below global breaker", async () => { - const execute = vi.fn().mockResolvedValue({ - content: [{ type: "text", text: "same output" }], - details: { ok: true }, - }); - // oxlint-disable-next-line typescript/no-explicit-any - const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { - agentId: "main", - sessionKey: "main", - }); - const params = { path: "/tmp/file" }; - - for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { - await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined(); - } - }); - - it("blocks generic repeated no-progress calls at global breaker threshold", async () => { - const execute = vi.fn().mockResolvedValue({ - content: [{ type: "text", text: "same output" }], - details: { ok: true }, - }); - // oxlint-disable-next-line typescript/no-explicit-any - const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { - agentId: "main", - sessionKey: "main", - }); - const params = { path: "/tmp/file" }; - - for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) { - await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined(); - } - - await expect( - tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined), - ).rejects.toThrow("global circuit breaker"); - }); -}); - describe("before_tool_call hook deduplication (#15502)", () => { let hookRunner: { hasHooks: ReturnType; diff --git a/src/agents/pi-tools.before-tool-call.test.ts b/src/agents/pi-tools.before-tool-call.test.ts new file mode 100644 index 00000000000..560baf89d75 --- /dev/null +++ b/src/agents/pi-tools.before-tool-call.test.ts @@ -0,0 +1,107 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js"; +import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; +import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js"; +import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js"; + +vi.mock("../plugins/hook-runner-global.js"); + +const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner); + +describe("before_tool_call loop detection behavior", () => { + let hookRunner: { + hasHooks: ReturnType; + runBeforeToolCall: ReturnType; + }; + + beforeEach(() => { + resetDiagnosticSessionStateForTest(); + hookRunner = { + hasHooks: vi.fn(), + runBeforeToolCall: vi.fn(), + }; + // oxlint-disable-next-line typescript/no-explicit-any + mockGetGlobalHookRunner.mockReturnValue(hookRunner as any); + hookRunner.hasHooks.mockReturnValue(false); + }); + + it("blocks known poll loops when no progress repeats", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], + details: { status: "running", aggregated: "steady" }, + }); + // oxlint-disable-next-line typescript/no-explicit-any + const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, { + agentId: "main", + sessionKey: "main", + }); + const params = { action: "poll", sessionId: "sess-1" }; + + for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { + await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined(); + } + + await expect( + tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined), + ).rejects.toThrow("CRITICAL"); + }); + + it("does not block known poll loops when output progresses", async () => { + const execute = vi.fn().mockImplementation(async (toolCallId: string) => { + return { + content: [{ type: "text", text: `output ${toolCallId}` }], + details: { status: "running", aggregated: `output ${toolCallId}` }, + }; + }); + // oxlint-disable-next-line typescript/no-explicit-any + const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, { + agentId: "main", + sessionKey: "main", + }); + const params = { action: "poll", sessionId: "sess-2" }; + + for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { + await expect( + tool.execute(`poll-progress-${i}`, params, undefined, undefined), + ).resolves.toBeDefined(); + } + }); + + it("keeps generic repeated calls warn-only below global breaker", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "same output" }], + details: { ok: true }, + }); + // oxlint-disable-next-line typescript/no-explicit-any + const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { + agentId: "main", + sessionKey: "main", + }); + const params = { path: "/tmp/file" }; + + for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { + await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined(); + } + }); + + it("blocks generic repeated no-progress calls at global breaker threshold", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "same output" }], + details: { ok: true }, + }); + // oxlint-disable-next-line typescript/no-explicit-any + const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { + agentId: "main", + sessionKey: "main", + }); + const params = { path: "/tmp/file" }; + + for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) { + await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined(); + } + + await expect( + tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined), + ).rejects.toThrow("global circuit breaker"); + }); +}); diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts index 03a48525d9f..5f7dec5c382 100644 --- a/src/agents/pi-tools.before-tool-call.ts +++ b/src/agents/pi-tools.before-tool-call.ts @@ -16,6 +16,36 @@ const BEFORE_TOOL_CALL_WRAPPED = Symbol("beforeToolCallWrapped"); const adjustedParamsByToolCallId = new Map(); const MAX_TRACKED_ADJUSTED_PARAMS = 1024; +async function recordLoopOutcome(args: { + ctx?: HookContext; + toolName: string; + toolParams: unknown; + toolCallId?: string; + result?: unknown; + error?: unknown; +}): Promise { + if (!args.ctx?.sessionKey) { + return; + } + try { + const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js"); + const { recordToolCallOutcome } = await import("./tool-loop-detection.js"); + const sessionState = getDiagnosticSessionState({ + sessionKey: args.ctx.sessionKey, + sessionId: args.ctx?.agentId, + }); + recordToolCallOutcome(sessionState, { + toolName: args.toolName, + toolParams: args.toolParams, + toolCallId: args.toolCallId, + result: args.result, + error: args.error, + }); + } catch (err) { + log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`); + } +} + export async function runBeforeToolCallHook(args: { toolName: string; params: unknown; @@ -124,50 +154,22 @@ export function wrapToolWithBeforeToolCallHook( const normalizedToolName = normalizeToolName(toolName || "tool"); try { const result = await execute(toolCallId, outcome.params, signal, onUpdate); - if (ctx?.sessionKey) { - try { - const { getDiagnosticSessionState } = - await import("../logging/diagnostic-session-state.js"); - const { recordToolCallOutcome } = await import("./tool-loop-detection.js"); - const sessionState = getDiagnosticSessionState({ - sessionKey: ctx.sessionKey, - sessionId: ctx?.agentId, - }); - recordToolCallOutcome(sessionState, { - toolName: normalizedToolName, - toolParams: outcome.params, - toolCallId, - result, - }); - } catch (err) { - log.warn( - `tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(err)}`, - ); - } - } + await recordLoopOutcome({ + ctx, + toolName: normalizedToolName, + toolParams: outcome.params, + toolCallId, + result, + }); return result; } catch (err) { - if (ctx?.sessionKey) { - try { - const { getDiagnosticSessionState } = - await import("../logging/diagnostic-session-state.js"); - const { recordToolCallOutcome } = await import("./tool-loop-detection.js"); - const sessionState = getDiagnosticSessionState({ - sessionKey: ctx.sessionKey, - sessionId: ctx?.agentId, - }); - recordToolCallOutcome(sessionState, { - toolName: normalizedToolName, - toolParams: outcome.params, - toolCallId, - error: err, - }); - } catch (recordErr) { - log.warn( - `tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(recordErr)}`, - ); - } - } + await recordLoopOutcome({ + ctx, + toolName: normalizedToolName, + toolParams: outcome.params, + toolCallId, + error: err, + }); throw err; } }, diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index e17aef97e57..365d8761b8a 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -68,6 +68,13 @@ describe("tool-loop-detection", () => { const hash2 = hashToolCall("tool", { b: 2, a: 1 }); expect(hash1).toBe(hash2); }); + + it("keeps hashes fixed-size even for large params", () => { + const payload = { data: "x".repeat(20_000) }; + const hash = hashToolCall("read", payload); + expect(hash.startsWith("read:")).toBe(true); + expect(hash.length).toBe("read:".length + 64); + }); }); describe("recordToolCall", () => { @@ -91,8 +98,7 @@ describe("tool-loop-detection", () => { expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE); const oldestCall = state.toolCallHistory?.[0]; - expect(oldestCall?.argsHash).toContain("iteration"); - expect(oldestCall?.argsHash).not.toContain('"iteration":0'); + expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 })); }); it("records timestamp for each call", () => { @@ -230,6 +236,26 @@ describe("tool-loop-detection", () => { } }); + it("records fixed-size result hashes for large tool outputs", () => { + const state = createState(); + const params = { action: "log", sessionId: "sess-big" }; + const toolCallId = "log-big"; + recordToolCall(state, "process", params, toolCallId); + recordToolCallOutcome(state, { + toolName: "process", + toolParams: params, + toolCallId, + result: { + content: [{ type: "text", text: "y".repeat(40_000) }], + details: { status: "running", totalLines: 1, totalChars: 40_000 }, + }, + }); + + const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId); + expect(typeof entry?.resultHash).toBe("string"); + expect(entry?.resultHash?.length).toBe(64); + }); + it("handles empty history", () => { const state = createState(); diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index 2d0efbc847e..280b93eb3aa 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -1,5 +1,7 @@ +import { createHash } from "node:crypto"; import type { SessionState } from "../logging/diagnostic-session-state.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; +import { isPlainObject } from "../utils.js"; const log = createSubsystemLogger("agents/loop-detection"); @@ -14,15 +16,10 @@ export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30; /** * Hash a tool call for pattern matching. - * Uses tool name + deterministic JSON stringification of params. + * Uses tool name + deterministic JSON serialization digest of params. */ export function hashToolCall(toolName: string, params: unknown): string { - try { - const paramsStr = stableStringify(params); - return `${toolName}:${paramsStr}`; - } catch { - return `${toolName}:${String(params)}`; - } + return `${toolName}:${digestStable(params)}`; } function stableStringify(value: unknown): string { @@ -37,8 +34,29 @@ function stableStringify(value: unknown): string { return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`; } -function isPlainObject(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); +function digestStable(value: unknown): string { + const serialized = stableStringifyFallback(value); + return createHash("sha256").update(serialized).digest("hex"); +} + +function stableStringifyFallback(value: unknown): string { + try { + return stableStringify(value); + } catch { + if (value === null || value === undefined) { + return `${value}`; + } + if (typeof value === "string") { + return value; + } + if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") { + return `${value}`; + } + if (value instanceof Error) { + return `${value.name}:${value.message}`; + } + return Object.prototype.toString.call(value); + } } function isKnownPollToolCall(toolName: string, params: unknown): boolean { @@ -86,10 +104,10 @@ function hashToolOutcome( error: unknown, ): string | undefined { if (error !== undefined) { - return `error:${formatErrorForHash(error)}`; + return `error:${digestStable(formatErrorForHash(error))}`; } if (!isPlainObject(result)) { - return result === undefined ? undefined : stableStringify(result); + return result === undefined ? undefined : digestStable(result); } const details = isPlainObject(result.details) ? result.details : {}; @@ -97,7 +115,7 @@ function hashToolOutcome( if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) { const action = params.action; if (action === "poll") { - return stableStringify({ + return digestStable({ action, status: details.status, exitCode: details.exitCode ?? null, @@ -107,7 +125,7 @@ function hashToolOutcome( }); } if (action === "log") { - return stableStringify({ + return digestStable({ action, status: details.status, totalLines: details.totalLines ?? null, @@ -120,7 +138,7 @@ function hashToolOutcome( } } - return stableStringify({ + return digestStable({ details, text, });