fix: harden loop detector and runnable coverage

2026-06-06 16:03:30 +00:00 · 2026-02-16 14:07:59 -05:00
parent 54999b7518
commit 5ee87092fd
6 changed files with 212 additions and 158 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
 - Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
 - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808)
+- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) Thanks @akramcodez and @beca-oc.
 - Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
 - Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
 - Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
--- a/src/agents/pi-tools.before-tool-call.e2e.test.ts
+++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts
@@ -3,7 +3,6 @@ import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-sessio
 import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
 import { toClientToolDefinitions, toToolDefinitions } from "./pi-tool-definition-adapter.js";
 import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
-import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";

 vi.mock("../plugins/hook-runner-global.js");

@@ -111,104 +110,6 @@ describe("before_tool_call hook integration", () => {
  });
 });

-describe("before_tool_call loop detection behavior", () => {
-  let hookRunner: {
-    hasHooks: ReturnType<typeof vi.fn>;
-    runBeforeToolCall: ReturnType<typeof vi.fn>;
-  };
-
-  beforeEach(() => {
-    resetDiagnosticSessionStateForTest();
-    hookRunner = {
-      hasHooks: vi.fn(),
-      runBeforeToolCall: vi.fn(),
-    };
-    // oxlint-disable-next-line typescript/no-explicit-any
-    mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
-    hookRunner.hasHooks.mockReturnValue(false);
-  });
-
-  it("blocks known poll loops when no progress repeats", async () => {
-    const execute = vi.fn().mockResolvedValue({
-      content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
-      details: { status: "running", aggregated: "steady" },
-    });
-    // oxlint-disable-next-line typescript/no-explicit-any
-    const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
-      agentId: "main",
-      sessionKey: "main",
-    });
-    const params = { action: "poll", sessionId: "sess-1" };
-
-    for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
-      await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
-    }
-
-    await expect(
-      tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
-    ).rejects.toThrow("CRITICAL");
-  });
-
-  it("does not block known poll loops when output progresses", async () => {
-    const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
-      return {
-        content: [{ type: "text", text: `output ${toolCallId}` }],
-        details: { status: "running", aggregated: `output ${toolCallId}` },
-      };
-    });
-    // oxlint-disable-next-line typescript/no-explicit-any
-    const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
-      agentId: "main",
-      sessionKey: "main",
-    });
-    const params = { action: "poll", sessionId: "sess-2" };
-
-    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
-      await expect(
-        tool.execute(`poll-progress-${i}`, params, undefined, undefined),
-      ).resolves.toBeDefined();
-    }
-  });
-
-  it("keeps generic repeated calls warn-only below global breaker", async () => {
-    const execute = vi.fn().mockResolvedValue({
-      content: [{ type: "text", text: "same output" }],
-      details: { ok: true },
-    });
-    // oxlint-disable-next-line typescript/no-explicit-any
-    const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
-      agentId: "main",
-      sessionKey: "main",
-    });
-    const params = { path: "/tmp/file" };
-
-    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
-      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
-    }
-  });
-
-  it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
-    const execute = vi.fn().mockResolvedValue({
-      content: [{ type: "text", text: "same output" }],
-      details: { ok: true },
-    });
-    // oxlint-disable-next-line typescript/no-explicit-any
-    const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
-      agentId: "main",
-      sessionKey: "main",
-    });
-    const params = { path: "/tmp/file" };
-
-    for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
-      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
-    }
-
-    await expect(
-      tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
-    ).rejects.toThrow("global circuit breaker");
-  });
-});
-
 describe("before_tool_call hook deduplication (#15502)", () => {
  let hookRunner: {
    hasHooks: ReturnType<typeof vi.fn>;
--- a/src/agents/pi-tools.before-tool-call.test.ts
+++ b/src/agents/pi-tools.before-tool-call.test.ts
@@ -0,0 +1,107 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
+import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
+import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
+import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
+
+vi.mock("../plugins/hook-runner-global.js");
+
+const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner);
+
+describe("before_tool_call loop detection behavior", () => {
+  let hookRunner: {
+    hasHooks: ReturnType<typeof vi.fn>;
+    runBeforeToolCall: ReturnType<typeof vi.fn>;
+  };
+
+  beforeEach(() => {
+    resetDiagnosticSessionStateForTest();
+    hookRunner = {
+      hasHooks: vi.fn(),
+      runBeforeToolCall: vi.fn(),
+    };
+    // oxlint-disable-next-line typescript/no-explicit-any
+    mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
+    hookRunner.hasHooks.mockReturnValue(false);
+  });
+
+  it("blocks known poll loops when no progress repeats", async () => {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
+      details: { status: "running", aggregated: "steady" },
+    });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
+      agentId: "main",
+      sessionKey: "main",
+    });
+    const params = { action: "poll", sessionId: "sess-1" };
+
+    for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+      await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+
+    await expect(
+      tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
+    ).rejects.toThrow("CRITICAL");
+  });
+
+  it("does not block known poll loops when output progresses", async () => {
+    const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
+      return {
+        content: [{ type: "text", text: `output ${toolCallId}` }],
+        details: { status: "running", aggregated: `output ${toolCallId}` },
+      };
+    });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
+      agentId: "main",
+      sessionKey: "main",
+    });
+    const params = { action: "poll", sessionId: "sess-2" };
+
+    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
+      await expect(
+        tool.execute(`poll-progress-${i}`, params, undefined, undefined),
+      ).resolves.toBeDefined();
+    }
+  });
+
+  it("keeps generic repeated calls warn-only below global breaker", async () => {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "same output" }],
+      details: { ok: true },
+    });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
+      agentId: "main",
+      sessionKey: "main",
+    });
+    const params = { path: "/tmp/file" };
+
+    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
+      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+  });
+
+  it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "same output" }],
+      details: { ok: true },
+    });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
+      agentId: "main",
+      sessionKey: "main",
+    });
+    const params = { path: "/tmp/file" };
+
+    for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+
+    await expect(
+      tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
+    ).rejects.toThrow("global circuit breaker");
+  });
+});
--- a/src/agents/pi-tools.before-tool-call.ts
+++ b/src/agents/pi-tools.before-tool-call.ts
@@ -16,6 +16,36 @@ const BEFORE_TOOL_CALL_WRAPPED = Symbol("beforeToolCallWrapped");
 const adjustedParamsByToolCallId = new Map<string, unknown>();
 const MAX_TRACKED_ADJUSTED_PARAMS = 1024;

+async function recordLoopOutcome(args: {
+  ctx?: HookContext;
+  toolName: string;
+  toolParams: unknown;
+  toolCallId?: string;
+  result?: unknown;
+  error?: unknown;
+}): Promise<void> {
+  if (!args.ctx?.sessionKey) {
+    return;
+  }
+  try {
+    const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
+    const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
+    const sessionState = getDiagnosticSessionState({
+      sessionKey: args.ctx.sessionKey,
+      sessionId: args.ctx?.agentId,
+    });
+    recordToolCallOutcome(sessionState, {
+      toolName: args.toolName,
+      toolParams: args.toolParams,
+      toolCallId: args.toolCallId,
+      result: args.result,
+      error: args.error,
+    });
+  } catch (err) {
+    log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`);
+  }
+}
+
 export async function runBeforeToolCallHook(args: {
  toolName: string;
  params: unknown;
@@ -124,50 +154,22 @@ export function wrapToolWithBeforeToolCallHook(
      const normalizedToolName = normalizeToolName(toolName || "tool");
      try {
        const result = await execute(toolCallId, outcome.params, signal, onUpdate);
-        if (ctx?.sessionKey) {
-          try {
-            const { getDiagnosticSessionState } =
-              await import("../logging/diagnostic-session-state.js");
-            const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
-            const sessionState = getDiagnosticSessionState({
-              sessionKey: ctx.sessionKey,
-              sessionId: ctx?.agentId,
-            });
-            recordToolCallOutcome(sessionState, {
-              toolName: normalizedToolName,
-              toolParams: outcome.params,
-              toolCallId,
-              result,
-            });
-          } catch (err) {
-            log.warn(
-              `tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(err)}`,
-            );
-          }
-        }
+        await recordLoopOutcome({
+          ctx,
+          toolName: normalizedToolName,
+          toolParams: outcome.params,
+          toolCallId,
+          result,
+        });
        return result;
      } catch (err) {
-        if (ctx?.sessionKey) {
-          try {
-            const { getDiagnosticSessionState } =
-              await import("../logging/diagnostic-session-state.js");
-            const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
-            const sessionState = getDiagnosticSessionState({
-              sessionKey: ctx.sessionKey,
-              sessionId: ctx?.agentId,
-            });
-            recordToolCallOutcome(sessionState, {
-              toolName: normalizedToolName,
-              toolParams: outcome.params,
-              toolCallId,
-              error: err,
-            });
-          } catch (recordErr) {
-            log.warn(
-              `tool loop outcome tracking failed: tool=${normalizedToolName} error=${String(recordErr)}`,
-            );
-          }
-        }
+        await recordLoopOutcome({
+          ctx,
+          toolName: normalizedToolName,
+          toolParams: outcome.params,
+          toolCallId,
+          error: err,
+        });
        throw err;
      }
    },
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@@ -68,6 +68,13 @@ describe("tool-loop-detection", () => {
      const hash2 = hashToolCall("tool", { b: 2, a: 1 });
      expect(hash1).toBe(hash2);
    });
+
+    it("keeps hashes fixed-size even for large params", () => {
+      const payload = { data: "x".repeat(20_000) };
+      const hash = hashToolCall("read", payload);
+      expect(hash.startsWith("read:")).toBe(true);
+      expect(hash.length).toBe("read:".length + 64);
+    });
  });

  describe("recordToolCall", () => {
@@ -91,8 +98,7 @@ describe("tool-loop-detection", () => {
      expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);

      const oldestCall = state.toolCallHistory?.[0];
-      expect(oldestCall?.argsHash).toContain("iteration");
-      expect(oldestCall?.argsHash).not.toContain('"iteration":0');
+      expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 }));
    });

    it("records timestamp for each call", () => {
@@ -230,6 +236,26 @@ describe("tool-loop-detection", () => {
      }
    });

+    it("records fixed-size result hashes for large tool outputs", () => {
+      const state = createState();
+      const params = { action: "log", sessionId: "sess-big" };
+      const toolCallId = "log-big";
+      recordToolCall(state, "process", params, toolCallId);
+      recordToolCallOutcome(state, {
+        toolName: "process",
+        toolParams: params,
+        toolCallId,
+        result: {
+          content: [{ type: "text", text: "y".repeat(40_000) }],
+          details: { status: "running", totalLines: 1, totalChars: 40_000 },
+        },
+      });
+
+      const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId);
+      expect(typeof entry?.resultHash).toBe("string");
+      expect(entry?.resultHash?.length).toBe(64);
+    });
+
    it("handles empty history", () => {
      const state = createState();

--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@@ -1,5 +1,7 @@
+import { createHash } from "node:crypto";
 import type { SessionState } from "../logging/diagnostic-session-state.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
+import { isPlainObject } from "../utils.js";

 const log = createSubsystemLogger("agents/loop-detection");

@@ -14,15 +16,10 @@ export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30;

 /**
 * Hash a tool call for pattern matching.
- * Uses tool name + deterministic JSON stringification of params.
+ * Uses tool name + deterministic JSON serialization digest of params.
 */
 export function hashToolCall(toolName: string, params: unknown): string {
-  try {
-    const paramsStr = stableStringify(params);
-    return `${toolName}:${paramsStr}`;
-  } catch {
-    return `${toolName}:${String(params)}`;
-  }
+  return `${toolName}:${digestStable(params)}`;
 }

 function stableStringify(value: unknown): string {
@@ -37,8 +34,29 @@ function stableStringify(value: unknown): string {
  return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`;
 }

-function isPlainObject(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null && !Array.isArray(value);
+function digestStable(value: unknown): string {
+  const serialized = stableStringifyFallback(value);
+  return createHash("sha256").update(serialized).digest("hex");
+}
+
+function stableStringifyFallback(value: unknown): string {
+  try {
+    return stableStringify(value);
+  } catch {
+    if (value === null || value === undefined) {
+      return `${value}`;
+    }
+    if (typeof value === "string") {
+      return value;
+    }
+    if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") {
+      return `${value}`;
+    }
+    if (value instanceof Error) {
+      return `${value.name}:${value.message}`;
+    }
+    return Object.prototype.toString.call(value);
+  }
 }

 function isKnownPollToolCall(toolName: string, params: unknown): boolean {
@@ -86,10 +104,10 @@ function hashToolOutcome(
  error: unknown,
 ): string | undefined {
  if (error !== undefined) {
-    return `error:${formatErrorForHash(error)}`;
+    return `error:${digestStable(formatErrorForHash(error))}`;
  }
  if (!isPlainObject(result)) {
-    return result === undefined ? undefined : stableStringify(result);
+    return result === undefined ? undefined : digestStable(result);
  }

  const details = isPlainObject(result.details) ? result.details : {};
@@ -97,7 +115,7 @@ function hashToolOutcome(
  if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
    const action = params.action;
    if (action === "poll") {
-      return stableStringify({
+      return digestStable({
        action,
        status: details.status,
        exitCode: details.exitCode ?? null,
@@ -107,7 +125,7 @@ function hashToolOutcome(
      });
    }
    if (action === "log") {
-      return stableStringify({
+      return digestStable({
        action,
        status: details.status,
        totalLines: details.totalLines ?? null,
@@ -120,7 +138,7 @@ function hashToolOutcome(
    }
  }

-  return stableStringify({
+  return digestStable({
    details,
    text,
  });