mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 16:14:58 +00:00
test: move integration-heavy suites to e2e lane
This commit is contained in:
321
src/agents/pi-tools.before-tool-call.e2e.test.ts
Normal file
321
src/agents/pi-tools.before-tool-call.e2e.test.ts
Normal file
@@ -0,0 +1,321 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
onDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticToolLoopEvent,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
|
||||
vi.mock("../plugins/hook-runner-global.js");
|
||||
|
||||
const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner);
|
||||
|
||||
describe("before_tool_call loop detection behavior", () => {
|
||||
let hookRunner: {
|
||||
hasHooks: ReturnType<typeof vi.fn>;
|
||||
runBeforeToolCall: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
const enabledLoopDetectionContext = {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
loopDetection: { enabled: true },
|
||||
};
|
||||
|
||||
const disabledLoopDetectionContext = {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
loopDetection: { enabled: false },
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
resetDiagnosticEventsForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
};
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
|
||||
hookRunner.hasHooks.mockReturnValue(false);
|
||||
});
|
||||
|
||||
function createWrappedTool(
|
||||
name: string,
|
||||
execute: ReturnType<typeof vi.fn>,
|
||||
loopDetectionContext = enabledLoopDetectionContext,
|
||||
) {
|
||||
return wrapToolWithBeforeToolCallHook(
|
||||
{ name, execute } as unknown as AnyAgentTool,
|
||||
loopDetectionContext,
|
||||
);
|
||||
}
|
||||
|
||||
async function withToolLoopEvents(
|
||||
run: (emitted: DiagnosticToolLoopEvent[]) => Promise<void>,
|
||||
filter: (evt: DiagnosticToolLoopEvent) => boolean = () => true,
|
||||
) {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop" && filter(evt)) {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
await run(emitted);
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
}
|
||||
|
||||
function createPingPongTools(options?: { withProgress?: boolean }) {
|
||||
const readExecute = options?.withProgress
|
||||
? vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `read ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}))
|
||||
: vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "read ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const listExecute = options?.withProgress
|
||||
? vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `list ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}))
|
||||
: vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "list ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
return {
|
||||
readTool: createWrappedTool("read", readExecute),
|
||||
listTool: createWrappedTool("list", listExecute),
|
||||
};
|
||||
}
|
||||
|
||||
async function runPingPongSequence(
|
||||
readTool: ReturnType<typeof createWrappedTool>,
|
||||
listTool: ReturnType<typeof createWrappedTool>,
|
||||
count: number,
|
||||
) {
|
||||
for (let i = 0; i < count; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function createGenericReadRepeatFixture() {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
return {
|
||||
tool: createWrappedTool("read", execute),
|
||||
params: { path: "/tmp/file" },
|
||||
};
|
||||
}
|
||||
|
||||
function createNoProgressProcessFixture(sessionId: string) {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
return {
|
||||
tool: createWrappedTool("process", execute),
|
||||
params: { action: "poll", sessionId },
|
||||
};
|
||||
}
|
||||
|
||||
function expectCriticalLoopEvent(
|
||||
loopEvent: DiagnosticToolLoopEvent | undefined,
|
||||
params: {
|
||||
detector: "ping_pong" | "known_poll_no_progress";
|
||||
toolName: string;
|
||||
count?: number;
|
||||
},
|
||||
) {
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("critical");
|
||||
expect(loopEvent?.action).toBe("block");
|
||||
expect(loopEvent?.detector).toBe(params.detector);
|
||||
expect(loopEvent?.count).toBe(params.count ?? CRITICAL_THRESHOLD);
|
||||
expect(loopEvent?.toolName).toBe(params.toolName);
|
||||
}
|
||||
|
||||
it("blocks known poll loops when no progress repeats", async () => {
|
||||
const { tool, params } = createNoProgressProcessFixture("sess-1");
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
});
|
||||
|
||||
it("does nothing when loopDetection.enabled is false", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
...disabledLoopDetectionContext,
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-off" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block known poll loops when output progresses", async () => {
|
||||
const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
|
||||
return {
|
||||
content: [{ type: "text", text: `output ${toolCallId}` }],
|
||||
details: { status: "running", aggregated: `output ${toolCallId}` },
|
||||
};
|
||||
});
|
||||
const tool = createWrappedTool("process", execute);
|
||||
const params = { action: "poll", sessionId: "sess-2" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(
|
||||
tool.execute(`poll-progress-${i}`, params, undefined, undefined),
|
||||
).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps generic repeated calls warn-only below global breaker", async () => {
|
||||
const { tool, params } = createGenericReadRepeatFixture();
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
|
||||
const { tool, params } = createGenericReadRepeatFixture();
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("global circuit breaker");
|
||||
});
|
||||
|
||||
it("coalesces repeated generic warning events into threshold buckets", async () => {
|
||||
await withToolLoopEvents(
|
||||
async (emitted) => {
|
||||
const { tool, params } = createGenericReadRepeatFixture();
|
||||
|
||||
for (let i = 0; i < 21; i += 1) {
|
||||
await tool.execute(`read-bucket-${i}`, params, undefined, undefined);
|
||||
}
|
||||
|
||||
const genericWarns = emitted.filter((evt) => evt.detector === "generic_repeat");
|
||||
expect(genericWarns.map((evt) => evt.count)).toEqual([10, 20]);
|
||||
},
|
||||
(evt) => evt.level === "warning",
|
||||
);
|
||||
});
|
||||
|
||||
it("emits structured warning diagnostic events for ping-pong loops", async () => {
|
||||
await withToolLoopEvents(async (emitted) => {
|
||||
const { readTool, listTool } = createPingPongTools();
|
||||
await runPingPongSequence(readTool, listTool, 9);
|
||||
|
||||
await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined);
|
||||
await readTool.execute("read-10", { path: "/a.txt" }, undefined, undefined);
|
||||
await listTool.execute("list-11", { dir: "/workspace" }, undefined, undefined);
|
||||
|
||||
const pingPongWarns = emitted.filter(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(pingPongWarns).toHaveLength(1);
|
||||
const loopEvent = pingPongWarns[0];
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("warning");
|
||||
expect(loopEvent?.action).toBe("warn");
|
||||
expect(loopEvent?.detector).toBe("ping_pong");
|
||||
expect(loopEvent?.count).toBe(10);
|
||||
expect(loopEvent?.toolName).toBe("list");
|
||||
});
|
||||
});
|
||||
|
||||
it("blocks ping-pong loops at critical threshold and emits critical diagnostic events", async () => {
|
||||
await withToolLoopEvents(async (emitted) => {
|
||||
const { readTool, listTool } = createPingPongTools();
|
||||
await runPingPongSequence(readTool, listTool, CRITICAL_THRESHOLD - 1);
|
||||
|
||||
await expect(
|
||||
listTool.execute(
|
||||
`list-${CRITICAL_THRESHOLD - 1}`,
|
||||
{ dir: "/workspace" },
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expectCriticalLoopEvent(loopEvent, {
|
||||
detector: "ping_pong",
|
||||
toolName: "list",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("does not block ping-pong at critical threshold when outcomes are progressing", async () => {
|
||||
await withToolLoopEvents(async (emitted) => {
|
||||
const { readTool, listTool } = createPingPongTools({ withProgress: true });
|
||||
await runPingPongSequence(readTool, listTool, CRITICAL_THRESHOLD - 1);
|
||||
|
||||
await expect(
|
||||
listTool.execute(
|
||||
`list-${CRITICAL_THRESHOLD - 1}`,
|
||||
{ dir: "/workspace" },
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
|
||||
const criticalPingPong = emitted.find(
|
||||
(evt) => evt.level === "critical" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(criticalPingPong).toBeUndefined();
|
||||
const warningPingPong = emitted.find(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(warningPingPong).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
it("emits structured critical diagnostic events when blocking loops", async () => {
|
||||
await withToolLoopEvents(async (emitted) => {
|
||||
const { tool, params } = createNoProgressProcessFixture("sess-crit");
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await tool.execute(`poll-${i}`, params, undefined, undefined);
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expectCriticalLoopEvent(loopEvent, {
|
||||
detector: "known_poll_no_progress",
|
||||
toolName: "process",
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user