mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-07 01:53:32 +00:00
feat: add ping-pong loop diagnostics
This commit is contained in:
@@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
|
||||
- Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
|
||||
- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only (including ping-pong alternation warnings), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
|
||||
- Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
|
||||
- Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import {
|
||||
onDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticToolLoopEvent,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
@@ -16,6 +22,7 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
resetDiagnosticEventsForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
@@ -104,4 +111,96 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("global circuit breaker");
|
||||
});
|
||||
|
||||
it("emits structured warning diagnostic events for ping-pong loops", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const readExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "read ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const listExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "list ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const readTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "read", execute: readExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
const listTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "list", execute: listExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
|
||||
for (let i = 0; i < 9; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
|
||||
await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined);
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("warning");
|
||||
expect(loopEvent?.action).toBe("warn");
|
||||
expect(loopEvent?.detector).toBe("ping_pong");
|
||||
expect(loopEvent?.count).toBe(10);
|
||||
expect(loopEvent?.toolName).toBe("list");
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("emits structured critical diagnostic events when blocking loops", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-crit" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await tool.execute(`poll-${i}`, params, undefined, undefined);
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("critical");
|
||||
expect(loopEvent?.action).toBe("block");
|
||||
expect(loopEvent?.detector).toBe("known_poll_no_progress");
|
||||
expect(loopEvent?.count).toBe(CRITICAL_THRESHOLD);
|
||||
expect(loopEvent?.toolName).toBe("process");
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -57,6 +57,7 @@ export async function runBeforeToolCallHook(args: {
|
||||
|
||||
if (args.ctx?.sessionKey) {
|
||||
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
|
||||
const { logToolLoopAction } = await import("../logging/diagnostic.js");
|
||||
const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js");
|
||||
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
@@ -69,12 +70,34 @@ export async function runBeforeToolCallHook(args: {
|
||||
if (loopResult.stuck) {
|
||||
if (loopResult.level === "critical") {
|
||||
log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`);
|
||||
logToolLoopAction({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
toolName,
|
||||
level: "critical",
|
||||
action: "block",
|
||||
detector: loopResult.detector,
|
||||
count: loopResult.count,
|
||||
message: loopResult.message,
|
||||
pairedToolName: loopResult.pairedToolName,
|
||||
});
|
||||
return {
|
||||
blocked: true,
|
||||
reason: loopResult.message,
|
||||
};
|
||||
} else {
|
||||
log.warn(`Loop warning for ${toolName}: ${loopResult.message}`);
|
||||
logToolLoopAction({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
toolName,
|
||||
level: "warning",
|
||||
action: "warn",
|
||||
detector: loopResult.detector,
|
||||
count: loopResult.count,
|
||||
message: loopResult.message,
|
||||
pairedToolName: loopResult.pairedToolName,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -136,6 +136,8 @@ describe("tool-loop-detection", () => {
|
||||
expect(result.stuck).toBe(true);
|
||||
if (result.stuck) {
|
||||
expect(result.level).toBe("warning");
|
||||
expect(result.detector).toBe("generic_repeat");
|
||||
expect(result.count).toBe(WARNING_THRESHOLD);
|
||||
expect(result.message).toContain("WARNING");
|
||||
expect(result.message).toContain(`${WARNING_THRESHOLD} times`);
|
||||
}
|
||||
@@ -176,6 +178,7 @@ describe("tool-loop-detection", () => {
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("known_poll_no_progress");
|
||||
expect(loopResult.message).toContain("no progress");
|
||||
}
|
||||
});
|
||||
@@ -196,6 +199,7 @@ describe("tool-loop-detection", () => {
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("known_poll_no_progress");
|
||||
expect(loopResult.message).toContain("CRITICAL");
|
||||
}
|
||||
});
|
||||
@@ -232,10 +236,45 @@ describe("tool-loop-detection", () => {
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("global_circuit_breaker");
|
||||
expect(loopResult.message).toContain("global circuit breaker");
|
||||
}
|
||||
});
|
||||
|
||||
it("warns on ping-pong alternating patterns", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
const listParams = { dir: "/workspace" };
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordToolCall(state, "read", readParams, `read-${i}`);
|
||||
} else {
|
||||
recordToolCall(state, "list", listParams, `list-${i}`);
|
||||
}
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", listParams);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("ping_pong");
|
||||
expect(loopResult.count).toBe(WARNING_THRESHOLD);
|
||||
expect(loopResult.message).toContain("ping-pong loop");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag ping-pong when alternation is broken", () => {
|
||||
const state = createState();
|
||||
recordToolCall(state, "read", { path: "/a.txt" }, "a1");
|
||||
recordToolCall(state, "list", { dir: "/workspace" }, "b1");
|
||||
recordToolCall(state, "read", { path: "/a.txt" }, "a2");
|
||||
recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" });
|
||||
expect(loopResult.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("records fixed-size result hashes for large tool outputs", () => {
|
||||
const state = createState();
|
||||
const params = { action: "log", sessionId: "sess-big" };
|
||||
|
||||
@@ -5,9 +5,22 @@ import { isPlainObject } from "../utils.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/loop-detection");
|
||||
|
||||
export type LoopDetectorKind =
|
||||
| "generic_repeat"
|
||||
| "known_poll_no_progress"
|
||||
| "global_circuit_breaker"
|
||||
| "ping_pong";
|
||||
|
||||
export type LoopDetectionResult =
|
||||
| { stuck: false }
|
||||
| { stuck: true; level: "warning" | "critical"; message: string };
|
||||
| {
|
||||
stuck: true;
|
||||
level: "warning" | "critical";
|
||||
detector: LoopDetectorKind;
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
};
|
||||
|
||||
export const TOOL_CALL_HISTORY_SIZE = 30;
|
||||
export const WARNING_THRESHOLD = 10;
|
||||
@@ -174,6 +187,61 @@ function getNoProgressStreak(
|
||||
return streak;
|
||||
}
|
||||
|
||||
function getPingPongStreak(
|
||||
history: Array<{ toolName: string; argsHash: string }>,
|
||||
currentSignature: string,
|
||||
): { count: number; pairedToolName?: string } {
|
||||
const last = history.at(-1);
|
||||
if (!last) {
|
||||
return { count: 0 };
|
||||
}
|
||||
|
||||
let otherSignature: string | undefined;
|
||||
let otherToolName: string | undefined;
|
||||
for (let i = history.length - 2; i >= 0; i -= 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
if (call.argsHash !== last.argsHash) {
|
||||
otherSignature = call.argsHash;
|
||||
otherToolName = call.toolName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!otherSignature || !otherToolName) {
|
||||
return { count: 0 };
|
||||
}
|
||||
|
||||
let alternatingTailCount = 0;
|
||||
for (let i = history.length - 1; i >= 0; i -= 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
const expected = alternatingTailCount % 2 === 0 ? last.argsHash : otherSignature;
|
||||
if (call.argsHash !== expected) {
|
||||
break;
|
||||
}
|
||||
alternatingTailCount += 1;
|
||||
}
|
||||
|
||||
if (alternatingTailCount < 2) {
|
||||
return { count: 0 };
|
||||
}
|
||||
|
||||
const expectedCurrentSignature = otherSignature;
|
||||
if (currentSignature !== expectedCurrentSignature) {
|
||||
return { count: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
count: alternatingTailCount + 1,
|
||||
pairedToolName: last.toolName,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an agent is stuck in a repetitive tool call loop.
|
||||
* Checks if the same tool+params combination has been called excessively.
|
||||
@@ -187,6 +255,7 @@ export function detectToolCallLoop(
|
||||
const currentHash = hashToolCall(toolName, params);
|
||||
const noProgressStreak = getNoProgressStreak(history, toolName, currentHash);
|
||||
const knownPollTool = isKnownPollToolCall(toolName, params);
|
||||
const pingPong = getPingPongStreak(history, currentHash);
|
||||
|
||||
if (noProgressStreak >= GLOBAL_CIRCUIT_BREAKER_THRESHOLD) {
|
||||
log.error(
|
||||
@@ -195,6 +264,8 @@ export function detectToolCallLoop(
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "global_circuit_breaker",
|
||||
count: noProgressStreak,
|
||||
message: `CRITICAL: ${toolName} has repeated identical no-progress outcomes ${noProgressStreak} times. Session execution blocked by global circuit breaker to prevent runaway loops.`,
|
||||
};
|
||||
}
|
||||
@@ -204,6 +275,8 @@ export function detectToolCallLoop(
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "known_poll_no_progress",
|
||||
count: noProgressStreak,
|
||||
message: `CRITICAL: Called ${toolName} with identical arguments and no progress ${noProgressStreak} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`,
|
||||
};
|
||||
}
|
||||
@@ -213,10 +286,26 @@ export function detectToolCallLoop(
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "known_poll_no_progress",
|
||||
count: noProgressStreak,
|
||||
message: `WARNING: You have called ${toolName} ${noProgressStreak} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (pingPong.count >= WARNING_THRESHOLD) {
|
||||
log.warn(
|
||||
`Ping-pong loop warning: alternating calls count=${pingPong.count} currentTool=${toolName}`,
|
||||
);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "ping_pong",
|
||||
count: pingPong.count,
|
||||
message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`,
|
||||
pairedToolName: pingPong.pairedToolName,
|
||||
};
|
||||
}
|
||||
|
||||
// Generic detector: warn-only for repeated identical calls.
|
||||
const recentCount = history.filter(
|
||||
(h) => h.toolName === toolName && h.argsHash === currentHash,
|
||||
@@ -227,6 +316,8 @@ export function detectToolCallLoop(
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "generic_repeat",
|
||||
count: recentCount,
|
||||
message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments. If this is not making progress, stop retrying and report the task as failed.`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -134,6 +134,19 @@ export type DiagnosticHeartbeatEvent = DiagnosticBaseEvent & {
|
||||
queued: number;
|
||||
};
|
||||
|
||||
export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
|
||||
type: "tool.loop";
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticEventPayload =
|
||||
| DiagnosticUsageEvent
|
||||
| DiagnosticWebhookReceivedEvent
|
||||
@@ -146,7 +159,8 @@ export type DiagnosticEventPayload =
|
||||
| DiagnosticLaneEnqueueEvent
|
||||
| DiagnosticLaneDequeueEvent
|
||||
| DiagnosticRunAttemptEvent
|
||||
| DiagnosticHeartbeatEvent;
|
||||
| DiagnosticHeartbeatEvent
|
||||
| DiagnosticToolLoopEvent;
|
||||
|
||||
export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event
|
||||
? Event extends DiagnosticEventPayload
|
||||
|
||||
@@ -256,6 +256,42 @@ export function logRunAttempt(params: SessionRef & { runId: string; attempt: num
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logToolLoopAction(
|
||||
params: SessionRef & {
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
},
|
||||
) {
|
||||
const payload = `tool loop: sessionId=${params.sessionId ?? "unknown"} sessionKey=${
|
||||
params.sessionKey ?? "unknown"
|
||||
} tool=${params.toolName} level=${params.level} action=${params.action} detector=${
|
||||
params.detector
|
||||
} count=${params.count}${params.pairedToolName ? ` pairedTool=${params.pairedToolName}` : ""} message="${params.message}"`;
|
||||
if (params.level === "critical") {
|
||||
diag.error(payload);
|
||||
} else {
|
||||
diag.warn(payload);
|
||||
}
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.loop",
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
toolName: params.toolName,
|
||||
level: params.level,
|
||||
action: params.action,
|
||||
detector: params.detector,
|
||||
count: params.count,
|
||||
message: params.message,
|
||||
pairedToolName: params.pairedToolName,
|
||||
});
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logActiveRuns() {
|
||||
const activeSessions = Array.from(diagnosticSessionStates.entries())
|
||||
.filter(([, s]) => s.state === "processing")
|
||||
|
||||
Reference in New Issue
Block a user