diff --git a/CHANGELOG.md b/CHANGELOG.md index 43053df7636..8b084859a2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai - Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky. - Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow. - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost. -- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only, and adding a global circuit breaker at 30 no-progress repeats. (#16808) Thanks @akramcodez and @beca-oc. +- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, keeping generic identical-call detection warn-only (including ping-pong alternation warnings), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc. - Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus. - Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT. - Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang. diff --git a/src/agents/pi-tools.before-tool-call.test.ts b/src/agents/pi-tools.before-tool-call.test.ts index 560baf89d75..5c369413767 100644 --- a/src/agents/pi-tools.before-tool-call.test.ts +++ b/src/agents/pi-tools.before-tool-call.test.ts @@ -1,4 +1,10 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { AnyAgentTool } from "./tools/common.js"; +import { + onDiagnosticEvent, + resetDiagnosticEventsForTest, + type DiagnosticToolLoopEvent, +} from "../infra/diagnostic-events.js"; import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js"; @@ -16,6 +22,7 @@ describe("before_tool_call loop detection behavior", () => { beforeEach(() => { resetDiagnosticSessionStateForTest(); + resetDiagnosticEventsForTest(); hookRunner = { hasHooks: vi.fn(), runBeforeToolCall: vi.fn(), @@ -104,4 +111,96 @@ describe("before_tool_call loop detection behavior", () => { tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined), ).rejects.toThrow("global circuit breaker"); }); + + it("emits structured warning diagnostic events for ping-pong loops", async () => { + const emitted: DiagnosticToolLoopEvent[] = []; + const stop = onDiagnosticEvent((evt) => { + if (evt.type === "tool.loop") { + emitted.push(evt); + } + }); + try { + const readExecute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "read ok" }], + details: { ok: true }, + }); + const listExecute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "list ok" }], + details: { ok: true }, + }); + const readTool = wrapToolWithBeforeToolCallHook( + { name: "read", execute: readExecute } as unknown as AnyAgentTool, + { + agentId: "main", + sessionKey: "main", + }, + ); + const listTool = wrapToolWithBeforeToolCallHook( + { name: "list", execute: listExecute } as unknown as AnyAgentTool, + { + agentId: "main", + sessionKey: "main", + }, + ); + + for (let i = 0; i < 9; i += 1) { + if (i % 2 === 0) { + await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined); + } else { + await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined); + } + } + + await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined); + + const loopEvent = emitted.at(-1); + expect(loopEvent?.type).toBe("tool.loop"); + expect(loopEvent?.level).toBe("warning"); + expect(loopEvent?.action).toBe("warn"); + expect(loopEvent?.detector).toBe("ping_pong"); + expect(loopEvent?.count).toBe(10); + expect(loopEvent?.toolName).toBe("list"); + } finally { + stop(); + } + }); + + it("emits structured critical diagnostic events when blocking loops", async () => { + const emitted: DiagnosticToolLoopEvent[] = []; + const stop = onDiagnosticEvent((evt) => { + if (evt.type === "tool.loop") { + emitted.push(evt); + } + }); + try { + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], + details: { status: "running", aggregated: "steady" }, + }); + // oxlint-disable-next-line typescript/no-explicit-any + const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, { + agentId: "main", + sessionKey: "main", + }); + const params = { action: "poll", sessionId: "sess-crit" }; + + for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) { + await tool.execute(`poll-${i}`, params, undefined, undefined); + } + + await expect( + tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined), + ).rejects.toThrow("CRITICAL"); + + const loopEvent = emitted.at(-1); + expect(loopEvent?.type).toBe("tool.loop"); + expect(loopEvent?.level).toBe("critical"); + expect(loopEvent?.action).toBe("block"); + expect(loopEvent?.detector).toBe("known_poll_no_progress"); + expect(loopEvent?.count).toBe(CRITICAL_THRESHOLD); + expect(loopEvent?.toolName).toBe("process"); + } finally { + stop(); + } + }); }); diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts index 5f7dec5c382..0cc7cfcba5f 100644 --- a/src/agents/pi-tools.before-tool-call.ts +++ b/src/agents/pi-tools.before-tool-call.ts @@ -57,6 +57,7 @@ export async function runBeforeToolCallHook(args: { if (args.ctx?.sessionKey) { const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js"); + const { logToolLoopAction } = await import("../logging/diagnostic.js"); const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js"); const sessionState = getDiagnosticSessionState({ @@ -69,12 +70,34 @@ export async function runBeforeToolCallHook(args: { if (loopResult.stuck) { if (loopResult.level === "critical") { log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`); + logToolLoopAction({ + sessionKey: args.ctx.sessionKey, + sessionId: args.ctx?.agentId, + toolName, + level: "critical", + action: "block", + detector: loopResult.detector, + count: loopResult.count, + message: loopResult.message, + pairedToolName: loopResult.pairedToolName, + }); return { blocked: true, reason: loopResult.message, }; } else { log.warn(`Loop warning for ${toolName}: ${loopResult.message}`); + logToolLoopAction({ + sessionKey: args.ctx.sessionKey, + sessionId: args.ctx?.agentId, + toolName, + level: "warning", + action: "warn", + detector: loopResult.detector, + count: loopResult.count, + message: loopResult.message, + pairedToolName: loopResult.pairedToolName, + }); } } diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index 365d8761b8a..37b1e58e178 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -136,6 +136,8 @@ describe("tool-loop-detection", () => { expect(result.stuck).toBe(true); if (result.stuck) { expect(result.level).toBe("warning"); + expect(result.detector).toBe("generic_repeat"); + expect(result.count).toBe(WARNING_THRESHOLD); expect(result.message).toContain("WARNING"); expect(result.message).toContain(`${WARNING_THRESHOLD} times`); } @@ -176,6 +178,7 @@ describe("tool-loop-detection", () => { expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); + expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("no progress"); } }); @@ -196,6 +199,7 @@ describe("tool-loop-detection", () => { expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); + expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("CRITICAL"); } }); @@ -232,10 +236,45 @@ describe("tool-loop-detection", () => { expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); + expect(loopResult.detector).toBe("global_circuit_breaker"); expect(loopResult.message).toContain("global circuit breaker"); } }); + it("warns on ping-pong alternating patterns", () => { + const state = createState(); + const readParams = { path: "/a.txt" }; + const listParams = { dir: "/workspace" }; + + for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) { + if (i % 2 === 0) { + recordToolCall(state, "read", readParams, `read-${i}`); + } else { + recordToolCall(state, "list", listParams, `list-${i}`); + } + } + + const loopResult = detectToolCallLoop(state, "list", listParams); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + expect(loopResult.detector).toBe("ping_pong"); + expect(loopResult.count).toBe(WARNING_THRESHOLD); + expect(loopResult.message).toContain("ping-pong loop"); + } + }); + + it("does not flag ping-pong when alternation is broken", () => { + const state = createState(); + recordToolCall(state, "read", { path: "/a.txt" }, "a1"); + recordToolCall(state, "list", { dir: "/workspace" }, "b1"); + recordToolCall(state, "read", { path: "/a.txt" }, "a2"); + recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation + + const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" }); + expect(loopResult.stuck).toBe(false); + }); + it("records fixed-size result hashes for large tool outputs", () => { const state = createState(); const params = { action: "log", sessionId: "sess-big" }; diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index 280b93eb3aa..903f9aacd92 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -5,9 +5,22 @@ import { isPlainObject } from "../utils.js"; const log = createSubsystemLogger("agents/loop-detection"); +export type LoopDetectorKind = + | "generic_repeat" + | "known_poll_no_progress" + | "global_circuit_breaker" + | "ping_pong"; + export type LoopDetectionResult = | { stuck: false } - | { stuck: true; level: "warning" | "critical"; message: string }; + | { + stuck: true; + level: "warning" | "critical"; + detector: LoopDetectorKind; + count: number; + message: string; + pairedToolName?: string; + }; export const TOOL_CALL_HISTORY_SIZE = 30; export const WARNING_THRESHOLD = 10; @@ -174,6 +187,61 @@ function getNoProgressStreak( return streak; } +function getPingPongStreak( + history: Array<{ toolName: string; argsHash: string }>, + currentSignature: string, +): { count: number; pairedToolName?: string } { + const last = history.at(-1); + if (!last) { + return { count: 0 }; + } + + let otherSignature: string | undefined; + let otherToolName: string | undefined; + for (let i = history.length - 2; i >= 0; i -= 1) { + const call = history[i]; + if (!call) { + continue; + } + if (call.argsHash !== last.argsHash) { + otherSignature = call.argsHash; + otherToolName = call.toolName; + break; + } + } + + if (!otherSignature || !otherToolName) { + return { count: 0 }; + } + + let alternatingTailCount = 0; + for (let i = history.length - 1; i >= 0; i -= 1) { + const call = history[i]; + if (!call) { + continue; + } + const expected = alternatingTailCount % 2 === 0 ? last.argsHash : otherSignature; + if (call.argsHash !== expected) { + break; + } + alternatingTailCount += 1; + } + + if (alternatingTailCount < 2) { + return { count: 0 }; + } + + const expectedCurrentSignature = otherSignature; + if (currentSignature !== expectedCurrentSignature) { + return { count: 0 }; + } + + return { + count: alternatingTailCount + 1, + pairedToolName: last.toolName, + }; +} + /** * Detect if an agent is stuck in a repetitive tool call loop. * Checks if the same tool+params combination has been called excessively. @@ -187,6 +255,7 @@ export function detectToolCallLoop( const currentHash = hashToolCall(toolName, params); const noProgressStreak = getNoProgressStreak(history, toolName, currentHash); const knownPollTool = isKnownPollToolCall(toolName, params); + const pingPong = getPingPongStreak(history, currentHash); if (noProgressStreak >= GLOBAL_CIRCUIT_BREAKER_THRESHOLD) { log.error( @@ -195,6 +264,8 @@ export function detectToolCallLoop( return { stuck: true, level: "critical", + detector: "global_circuit_breaker", + count: noProgressStreak, message: `CRITICAL: ${toolName} has repeated identical no-progress outcomes ${noProgressStreak} times. Session execution blocked by global circuit breaker to prevent runaway loops.`, }; } @@ -204,6 +275,8 @@ export function detectToolCallLoop( return { stuck: true, level: "critical", + detector: "known_poll_no_progress", + count: noProgressStreak, message: `CRITICAL: Called ${toolName} with identical arguments and no progress ${noProgressStreak} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`, }; } @@ -213,10 +286,26 @@ export function detectToolCallLoop( return { stuck: true, level: "warning", + detector: "known_poll_no_progress", + count: noProgressStreak, message: `WARNING: You have called ${toolName} ${noProgressStreak} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`, }; } + if (pingPong.count >= WARNING_THRESHOLD) { + log.warn( + `Ping-pong loop warning: alternating calls count=${pingPong.count} currentTool=${toolName}`, + ); + return { + stuck: true, + level: "warning", + detector: "ping_pong", + count: pingPong.count, + message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`, + pairedToolName: pingPong.pairedToolName, + }; + } + // Generic detector: warn-only for repeated identical calls. const recentCount = history.filter( (h) => h.toolName === toolName && h.argsHash === currentHash, @@ -227,6 +316,8 @@ export function detectToolCallLoop( return { stuck: true, level: "warning", + detector: "generic_repeat", + count: recentCount, message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments. If this is not making progress, stop retrying and report the task as failed.`, }; } diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index 6b9f9f2d1bd..cf8958dd718 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -134,6 +134,19 @@ export type DiagnosticHeartbeatEvent = DiagnosticBaseEvent & { queued: number; }; +export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & { + type: "tool.loop"; + sessionKey?: string; + sessionId?: string; + toolName: string; + level: "warning" | "critical"; + action: "warn" | "block"; + detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong"; + count: number; + message: string; + pairedToolName?: string; +}; + export type DiagnosticEventPayload = | DiagnosticUsageEvent | DiagnosticWebhookReceivedEvent @@ -146,7 +159,8 @@ export type DiagnosticEventPayload = | DiagnosticLaneEnqueueEvent | DiagnosticLaneDequeueEvent | DiagnosticRunAttemptEvent - | DiagnosticHeartbeatEvent; + | DiagnosticHeartbeatEvent + | DiagnosticToolLoopEvent; export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event ? Event extends DiagnosticEventPayload diff --git a/src/logging/diagnostic.ts b/src/logging/diagnostic.ts index 8ecdc4abd66..3751416c13a 100644 --- a/src/logging/diagnostic.ts +++ b/src/logging/diagnostic.ts @@ -256,6 +256,42 @@ export function logRunAttempt(params: SessionRef & { runId: string; attempt: num markActivity(); } +export function logToolLoopAction( + params: SessionRef & { + toolName: string; + level: "warning" | "critical"; + action: "warn" | "block"; + detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong"; + count: number; + message: string; + pairedToolName?: string; + }, +) { + const payload = `tool loop: sessionId=${params.sessionId ?? "unknown"} sessionKey=${ + params.sessionKey ?? "unknown" + } tool=${params.toolName} level=${params.level} action=${params.action} detector=${ + params.detector + } count=${params.count}${params.pairedToolName ? ` pairedTool=${params.pairedToolName}` : ""} message="${params.message}"`; + if (params.level === "critical") { + diag.error(payload); + } else { + diag.warn(payload); + } + emitDiagnosticEvent({ + type: "tool.loop", + sessionId: params.sessionId, + sessionKey: params.sessionKey, + toolName: params.toolName, + level: params.level, + action: params.action, + detector: params.detector, + count: params.count, + message: params.message, + pairedToolName: params.pairedToolName, + }); + markActivity(); +} + export function logActiveRuns() { const activeSessions = Array.from(diagnosticSessionStates.entries()) .filter(([, s]) => s.state === "processing")