test: move integration-heavy suites to e2e lane

2026-05-10 16:14:58 +00:00 · 2026-03-02 05:31:26 +00:00
parent 656121a12b
commit a13586619b
34 changed files with 162 additions and 208 deletions
--- a/src/agents/pi-tools.before-tool-call.e2e.test.ts
+++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts
@@ -0,0 +1,321 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import {
+  onDiagnosticEvent,
+  resetDiagnosticEventsForTest,
+  type DiagnosticToolLoopEvent,
+} from "../infra/diagnostic-events.js";
+import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
+import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
+import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
+import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
+import type { AnyAgentTool } from "./tools/common.js";
+
+vi.mock("../plugins/hook-runner-global.js");
+
+const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner);
+
+describe("before_tool_call loop detection behavior", () => {
+  let hookRunner: {
+    hasHooks: ReturnType<typeof vi.fn>;
+    runBeforeToolCall: ReturnType<typeof vi.fn>;
+  };
+  const enabledLoopDetectionContext = {
+    agentId: "main",
+    sessionKey: "main",
+    loopDetection: { enabled: true },
+  };
+
+  const disabledLoopDetectionContext = {
+    agentId: "main",
+    sessionKey: "main",
+    loopDetection: { enabled: false },
+  };
+
+  beforeEach(() => {
+    resetDiagnosticSessionStateForTest();
+    resetDiagnosticEventsForTest();
+    hookRunner = {
+      hasHooks: vi.fn(),
+      runBeforeToolCall: vi.fn(),
+    };
+    // oxlint-disable-next-line typescript/no-explicit-any
+    mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
+    hookRunner.hasHooks.mockReturnValue(false);
+  });
+
+  function createWrappedTool(
+    name: string,
+    execute: ReturnType<typeof vi.fn>,
+    loopDetectionContext = enabledLoopDetectionContext,
+  ) {
+    return wrapToolWithBeforeToolCallHook(
+      { name, execute } as unknown as AnyAgentTool,
+      loopDetectionContext,
+    );
+  }
+
+  async function withToolLoopEvents(
+    run: (emitted: DiagnosticToolLoopEvent[]) => Promise<void>,
+    filter: (evt: DiagnosticToolLoopEvent) => boolean = () => true,
+  ) {
+    const emitted: DiagnosticToolLoopEvent[] = [];
+    const stop = onDiagnosticEvent((evt) => {
+      if (evt.type === "tool.loop" && filter(evt)) {
+        emitted.push(evt);
+      }
+    });
+    try {
+      await run(emitted);
+    } finally {
+      stop();
+    }
+  }
+
+  function createPingPongTools(options?: { withProgress?: boolean }) {
+    const readExecute = options?.withProgress
+      ? vi.fn().mockImplementation(async (toolCallId: string) => ({
+          content: [{ type: "text", text: `read ${toolCallId}` }],
+          details: { ok: true },
+        }))
+      : vi.fn().mockResolvedValue({
+          content: [{ type: "text", text: "read ok" }],
+          details: { ok: true },
+        });
+    const listExecute = options?.withProgress
+      ? vi.fn().mockImplementation(async (toolCallId: string) => ({
+          content: [{ type: "text", text: `list ${toolCallId}` }],
+          details: { ok: true },
+        }))
+      : vi.fn().mockResolvedValue({
+          content: [{ type: "text", text: "list ok" }],
+          details: { ok: true },
+        });
+    return {
+      readTool: createWrappedTool("read", readExecute),
+      listTool: createWrappedTool("list", listExecute),
+    };
+  }
+
+  async function runPingPongSequence(
+    readTool: ReturnType<typeof createWrappedTool>,
+    listTool: ReturnType<typeof createWrappedTool>,
+    count: number,
+  ) {
+    for (let i = 0; i < count; i += 1) {
+      if (i % 2 === 0) {
+        await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
+      } else {
+        await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
+      }
+    }
+  }
+
+  function createGenericReadRepeatFixture() {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "same output" }],
+      details: { ok: true },
+    });
+    return {
+      tool: createWrappedTool("read", execute),
+      params: { path: "/tmp/file" },
+    };
+  }
+
+  function createNoProgressProcessFixture(sessionId: string) {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
+      details: { status: "running", aggregated: "steady" },
+    });
+    return {
+      tool: createWrappedTool("process", execute),
+      params: { action: "poll", sessionId },
+    };
+  }
+
+  function expectCriticalLoopEvent(
+    loopEvent: DiagnosticToolLoopEvent | undefined,
+    params: {
+      detector: "ping_pong" | "known_poll_no_progress";
+      toolName: string;
+      count?: number;
+    },
+  ) {
+    expect(loopEvent?.type).toBe("tool.loop");
+    expect(loopEvent?.level).toBe("critical");
+    expect(loopEvent?.action).toBe("block");
+    expect(loopEvent?.detector).toBe(params.detector);
+    expect(loopEvent?.count).toBe(params.count ?? CRITICAL_THRESHOLD);
+    expect(loopEvent?.toolName).toBe(params.toolName);
+  }
+
+  it("blocks known poll loops when no progress repeats", async () => {
+    const { tool, params } = createNoProgressProcessFixture("sess-1");
+
+    for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+      await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+
+    await expect(
+      tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
+    ).rejects.toThrow("CRITICAL");
+  });
+
+  it("does nothing when loopDetection.enabled is false", async () => {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
+      details: { status: "running", aggregated: "steady" },
+    });
+    // oxlint-disable-next-line typescript/no-explicit-any
+    const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
+      ...disabledLoopDetectionContext,
+    });
+    const params = { action: "poll", sessionId: "sess-off" };
+
+    for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+      await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+  });
+
+  it("does not block known poll loops when output progresses", async () => {
+    const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
+      return {
+        content: [{ type: "text", text: `output ${toolCallId}` }],
+        details: { status: "running", aggregated: `output ${toolCallId}` },
+      };
+    });
+    const tool = createWrappedTool("process", execute);
+    const params = { action: "poll", sessionId: "sess-2" };
+
+    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
+      await expect(
+        tool.execute(`poll-progress-${i}`, params, undefined, undefined),
+      ).resolves.toBeDefined();
+    }
+  });
+
+  it("keeps generic repeated calls warn-only below global breaker", async () => {
+    const { tool, params } = createGenericReadRepeatFixture();
+
+    for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
+      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+  });
+
+  it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
+    const { tool, params } = createGenericReadRepeatFixture();
+
+    for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+      await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
+    }
+
+    await expect(
+      tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
+    ).rejects.toThrow("global circuit breaker");
+  });
+
+  it("coalesces repeated generic warning events into threshold buckets", async () => {
+    await withToolLoopEvents(
+      async (emitted) => {
+        const { tool, params } = createGenericReadRepeatFixture();
+
+        for (let i = 0; i < 21; i += 1) {
+          await tool.execute(`read-bucket-${i}`, params, undefined, undefined);
+        }
+
+        const genericWarns = emitted.filter((evt) => evt.detector === "generic_repeat");
+        expect(genericWarns.map((evt) => evt.count)).toEqual([10, 20]);
+      },
+      (evt) => evt.level === "warning",
+    );
+  });
+
+  it("emits structured warning diagnostic events for ping-pong loops", async () => {
+    await withToolLoopEvents(async (emitted) => {
+      const { readTool, listTool } = createPingPongTools();
+      await runPingPongSequence(readTool, listTool, 9);
+
+      await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined);
+      await readTool.execute("read-10", { path: "/a.txt" }, undefined, undefined);
+      await listTool.execute("list-11", { dir: "/workspace" }, undefined, undefined);
+
+      const pingPongWarns = emitted.filter(
+        (evt) => evt.level === "warning" && evt.detector === "ping_pong",
+      );
+      expect(pingPongWarns).toHaveLength(1);
+      const loopEvent = pingPongWarns[0];
+      expect(loopEvent?.type).toBe("tool.loop");
+      expect(loopEvent?.level).toBe("warning");
+      expect(loopEvent?.action).toBe("warn");
+      expect(loopEvent?.detector).toBe("ping_pong");
+      expect(loopEvent?.count).toBe(10);
+      expect(loopEvent?.toolName).toBe("list");
+    });
+  });
+
+  it("blocks ping-pong loops at critical threshold and emits critical diagnostic events", async () => {
+    await withToolLoopEvents(async (emitted) => {
+      const { readTool, listTool } = createPingPongTools();
+      await runPingPongSequence(readTool, listTool, CRITICAL_THRESHOLD - 1);
+
+      await expect(
+        listTool.execute(
+          `list-${CRITICAL_THRESHOLD - 1}`,
+          { dir: "/workspace" },
+          undefined,
+          undefined,
+        ),
+      ).rejects.toThrow("CRITICAL");
+
+      const loopEvent = emitted.at(-1);
+      expectCriticalLoopEvent(loopEvent, {
+        detector: "ping_pong",
+        toolName: "list",
+      });
+    });
+  });
+
+  it("does not block ping-pong at critical threshold when outcomes are progressing", async () => {
+    await withToolLoopEvents(async (emitted) => {
+      const { readTool, listTool } = createPingPongTools({ withProgress: true });
+      await runPingPongSequence(readTool, listTool, CRITICAL_THRESHOLD - 1);
+
+      await expect(
+        listTool.execute(
+          `list-${CRITICAL_THRESHOLD - 1}`,
+          { dir: "/workspace" },
+          undefined,
+          undefined,
+        ),
+      ).resolves.toBeDefined();
+
+      const criticalPingPong = emitted.find(
+        (evt) => evt.level === "critical" && evt.detector === "ping_pong",
+      );
+      expect(criticalPingPong).toBeUndefined();
+      const warningPingPong = emitted.find(
+        (evt) => evt.level === "warning" && evt.detector === "ping_pong",
+      );
+      expect(warningPingPong).toBeTruthy();
+    });
+  });
+
+  it("emits structured critical diagnostic events when blocking loops", async () => {
+    await withToolLoopEvents(async (emitted) => {
+      const { tool, params } = createNoProgressProcessFixture("sess-crit");
+
+      for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
+        await tool.execute(`poll-${i}`, params, undefined, undefined);
+      }
+
+      await expect(
+        tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
+      ).rejects.toThrow("CRITICAL");
+
+      const loopEvent = emitted.at(-1);
+      expectCriticalLoopEvent(loopEvent, {
+        detector: "known_poll_no_progress",
+        toolName: "process",
+      });
+    });
+  });
+});