From b25d3652e78d92f12f8b70286d154606f9a83253 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 21 Feb 2026 15:35:45 +0100 Subject: [PATCH] fix(agents): cap embedded runner retry loop --- CHANGELOG.md | 1 + .../run.overflow-compaction.test.ts | 27 ++++++++++++++++ src/agents/pi-embedded-runner/run.ts | 32 +++++++++++++++++++ src/agents/pi-embedded-runner/types.ts | 7 +++- 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef1155c3cc5..01542c3bc2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Security/Agents: cap embedded Pi runner outer retry loop to 24 attempts and return an explicit `retry_limit` error payload when retries never converge, preventing unbounded internal retry cycles (`GHSA-76m6-pj3w-v7mf`). - Agents/Tool images: include source filenames in `agents/tool-images` resize logs so compression events can be traced back to specific files. - Providers/OAuth: harden Qwen and Chutes refresh handling by validating refresh response expiry values and preserving prior refresh tokens when providers return empty refresh token fields, with regression coverage for empty-token responses. - Models/Kimi-Coding: add missing implicit provider template for `kimi-coding` with correct `anthropic-messages` API type and base URL, fixing 403 errors when using Kimi for Coding. (#22409) diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index 1dc794baa81..29531fb07a3 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -1,5 +1,6 @@ import "./run.overflow-compaction.mocks.shared.js"; import { beforeEach, describe, expect, it, vi } from "vitest"; +import { pickFallbackThinkingLevel } from "../pi-embedded-helpers.js"; import { compactEmbeddedPiSessionDirect } from "./compact.js"; import { runEmbeddedPiAgent } from "./run.js"; import { makeAttemptResult, mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js"; @@ -16,6 +17,7 @@ const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOve const mockedTruncateOversizedToolResultsInSession = vi.mocked( truncateOversizedToolResultsInSession, ); +const mockedPickFallbackThinkingLevel = vi.mocked(pickFallbackThinkingLevel); describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { beforeEach(() => { @@ -106,4 +108,29 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4); expect(result.meta.error?.kind).toBe("context_overflow"); }); + + it("returns retry_limit when repeated retries never converge", async () => { + mockedRunEmbeddedAttempt.mockReset(); + mockedCompactDirect.mockReset(); + mockedPickFallbackThinkingLevel.mockReset(); + mockedRunEmbeddedAttempt.mockResolvedValue( + makeAttemptResult({ promptError: new Error("unsupported reasoning mode") }), + ); + mockedPickFallbackThinkingLevel.mockReturnValue("low"); + + const result = await runEmbeddedPiAgent({ + sessionId: "test-session", + sessionKey: "test-key", + sessionFile: "/tmp/session.json", + workspaceDir: "/tmp/workspace", + prompt: "hello", + timeoutMs: 30000, + runId: "run-1", + }); + + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(24); + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(result.meta.error?.kind).toBe("retry_limit"); + expect(result.payloads?.[0]?.isError).toBe(true); + }); }); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 81f26a47902..be61bb60156 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -102,6 +102,9 @@ function createCompactionDiagId(): string { return `ovf-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; } +// Defensive guard for the outer run loop across all retry branches. +const MAX_RUN_RETRY_ITERATIONS = 24; + const hasUsageValues = ( usage: ReturnType, ): usage is NonNullable> => @@ -475,13 +478,42 @@ export async function runEmbeddedPiAgent( } const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; + const MAX_RUN_LOOP_ITERATIONS = MAX_RUN_RETRY_ITERATIONS; let overflowCompactionAttempts = 0; let toolResultTruncationAttempted = false; const usageAccumulator = createUsageAccumulator(); let lastRunPromptUsage: ReturnType | undefined; let autoCompactionCount = 0; + let runLoopIterations = 0; try { while (true) { + if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) { + const message = `Exceeded retry limit after ${runLoopIterations} attempts.`; + log.error( + `[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` + + `provider=${provider}/${modelId} attempts=${runLoopIterations}`, + ); + return { + payloads: [ + { + text: + "Request failed after repeated internal retries. " + + "Please try again, or use /new to start a fresh session.", + isError: true, + }, + ], + meta: { + durationMs: Date.now() - started, + agentMeta: { + sessionId: params.sessionId, + provider, + model: model.id, + }, + error: { kind: "retry_limit", message }, + }, + }; + } + runLoopIterations += 1; attemptedThinking.add(thinkLevel); await fs.mkdir(resolvedWorkspace, { recursive: true }); diff --git a/src/agents/pi-embedded-runner/types.ts b/src/agents/pi-embedded-runner/types.ts index ac7c723d24b..722abbf2a9a 100644 --- a/src/agents/pi-embedded-runner/types.ts +++ b/src/agents/pi-embedded-runner/types.ts @@ -36,7 +36,12 @@ export type EmbeddedPiRunMeta = { aborted?: boolean; systemPromptReport?: SessionSystemPromptReport; error?: { - kind: "context_overflow" | "compaction_failure" | "role_ordering" | "image_size"; + kind: + | "context_overflow" + | "compaction_failure" + | "role_ordering" + | "image_size" + | "retry_limit"; message: string; }; /** Stop reason for the agent run (e.g., "completed", "tool_calls"). */