feat: add configurable tool loop detection

This commit is contained in:
Peter Steinberger
2026-02-17 00:17:01 +01:00
parent dacffd7ac8
commit 076df941a3
14 changed files with 557 additions and 30 deletions

View File

@@ -1,4 +1,5 @@
import { describe, expect, it } from "vitest";
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
import type { SessionState } from "../logging/diagnostic-session-state.js";
import {
CRITICAL_THRESHOLD,
@@ -20,6 +21,13 @@ function createState(): SessionState {
};
}
const enabledLoopDetectionConfig: ToolLoopDetectionConfig = { enabled: true };
const shortHistoryLoopConfig: ToolLoopDetectionConfig = {
enabled: true,
historySize: 4,
};
function recordSuccessfulCall(
state: SessionState,
toolName: string,
@@ -111,9 +119,31 @@ describe("tool-loop-detection", () => {
expect(timestamp).toBeGreaterThanOrEqual(before);
expect(timestamp).toBeLessThanOrEqual(after);
});
it("respects configured historySize", () => {
const state = createState();
for (let i = 0; i < 10; i += 1) {
recordToolCall(state, "tool", { iteration: i }, `call-${i}`, shortHistoryLoopConfig);
}
expect(state.toolCallHistory).toHaveLength(4);
expect(state.toolCallHistory?.[0]?.argsHash).toBe(hashToolCall("tool", { iteration: 6 }));
});
});
describe("detectToolCallLoop", () => {
it("is disabled by default", () => {
const state = createState();
for (let i = 0; i < 20; i += 1) {
recordToolCall(state, "read", { path: "/same.txt" }, `default-${i}`);
}
const loopResult = detectToolCallLoop(state, "read", { path: "/same.txt" });
expect(loopResult.stuck).toBe(false);
});
it("does not flag unique tool calls", () => {
const state = createState();
@@ -121,7 +151,12 @@ describe("tool-loop-detection", () => {
recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`);
}
const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" });
const result = detectToolCallLoop(
state,
"read",
{ path: "/new-file.txt" },
enabledLoopDetectionConfig,
);
expect(result.stuck).toBe(false);
});
@@ -131,7 +166,12 @@ describe("tool-loop-detection", () => {
recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`);
}
const result = detectToolCallLoop(state, "read", { path: "/same.txt" });
const result = detectToolCallLoop(
state,
"read",
{ path: "/same.txt" },
enabledLoopDetectionConfig,
);
expect(result.stuck).toBe(true);
if (result.stuck) {
@@ -155,13 +195,74 @@ describe("tool-loop-detection", () => {
recordSuccessfulCall(state, "read", params, result, i);
}
const loopResult = detectToolCallLoop(state, "read", params);
const loopResult = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
}
});
it("applies custom thresholds when detection is enabled", () => {
const state = createState();
const params = { action: "poll", sessionId: "sess-custom" };
const result = {
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
details: { status: "running", aggregated: "steady" },
};
const config: ToolLoopDetectionConfig = {
enabled: true,
warningThreshold: 2,
criticalThreshold: 4,
detectors: {
genericRepeat: false,
knownPollNoProgress: true,
pingPong: false,
},
};
for (let i = 0; i < 2; i += 1) {
recordSuccessfulCall(state, "process", params, result, i);
}
const warningResult = detectToolCallLoop(state, "process", params, config);
expect(warningResult.stuck).toBe(true);
if (warningResult.stuck) {
expect(warningResult.level).toBe("warning");
}
recordSuccessfulCall(state, "process", params, result, 2);
recordSuccessfulCall(state, "process", params, result, 3);
const criticalResult = detectToolCallLoop(state, "process", params, config);
expect(criticalResult.stuck).toBe(true);
if (criticalResult.stuck) {
expect(criticalResult.level).toBe("critical");
}
expect(criticalResult.detector).toBe("known_poll_no_progress");
});
it("can disable specific detectors", () => {
const state = createState();
const params = { action: "poll", sessionId: "sess-no-detectors" };
const result = {
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
details: { status: "running", aggregated: "steady" },
};
const config: ToolLoopDetectionConfig = {
enabled: true,
detectors: {
genericRepeat: false,
knownPollNoProgress: false,
pingPong: false,
},
};
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
recordSuccessfulCall(state, "process", params, result, i);
}
const loopResult = detectToolCallLoop(state, "process", params, config);
expect(loopResult.stuck).toBe(false);
});
it("warns for known polling no-progress loops", () => {
const state = createState();
const params = { action: "poll", sessionId: "sess-1" };
@@ -174,7 +275,7 @@ describe("tool-loop-detection", () => {
recordSuccessfulCall(state, "process", params, result, i);
}
const loopResult = detectToolCallLoop(state, "process", params);
const loopResult = detectToolCallLoop(state, "process", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
@@ -195,7 +296,7 @@ describe("tool-loop-detection", () => {
recordSuccessfulCall(state, "process", params, result, i);
}
const loopResult = detectToolCallLoop(state, "process", params);
const loopResult = detectToolCallLoop(state, "process", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
@@ -216,7 +317,7 @@ describe("tool-loop-detection", () => {
recordSuccessfulCall(state, "process", params, result, i);
}
const loopResult = detectToolCallLoop(state, "process", params);
const loopResult = detectToolCallLoop(state, "process", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(false);
});
@@ -232,7 +333,7 @@ describe("tool-loop-detection", () => {
recordSuccessfulCall(state, "read", params, result, i);
}
const loopResult = detectToolCallLoop(state, "read", params);
const loopResult = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
@@ -254,7 +355,7 @@ describe("tool-loop-detection", () => {
}
}
const loopResult = detectToolCallLoop(state, "list", listParams);
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
@@ -289,7 +390,7 @@ describe("tool-loop-detection", () => {
}
}
const loopResult = detectToolCallLoop(state, "list", listParams);
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
@@ -325,7 +426,7 @@ describe("tool-loop-detection", () => {
}
}
const loopResult = detectToolCallLoop(state, "list", listParams);
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
@@ -341,7 +442,12 @@ describe("tool-loop-detection", () => {
recordToolCall(state, "read", { path: "/a.txt" }, "a2");
recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation
const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" });
const loopResult = detectToolCallLoop(
state,
"list",
{ dir: "/workspace" },
enabledLoopDetectionConfig,
);
expect(loopResult.stuck).toBe(false);
});
@@ -368,7 +474,7 @@ describe("tool-loop-detection", () => {
it("handles empty history", () => {
const state = createState();
const result = detectToolCallLoop(state, "tool", { arg: 1 });
const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
expect(result.stuck).toBe(false);
});
});