fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
2026-05-09 17:04:32 +00:00 · 2026-02-07 17:40:51 -08:00
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions
--- a/src/agents/session-tool-result-guard.ts
+++ b/src/agents/session-tool-result-guard.ts
@@ -1,8 +1,76 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { TextContent } from "@mariozechner/pi-ai";
 import type { SessionManager } from "@mariozechner/pi-coding-agent";
 import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
+import { HARD_MAX_TOOL_RESULT_CHARS } from "./pi-embedded-runner/tool-result-truncation.js";
 import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";

+const GUARD_TRUNCATION_SUFFIX =
+  "\n\n⚠️ [Content truncated during persistence — original exceeded size limit. " +
+  "Use offset/limit parameters or request specific sections for large content.]";
+
+/**
+ * Truncate oversized text content blocks in a tool result message.
+ * Returns the original message if under the limit, or a new message with
+ * truncated text blocks otherwise.
+ */
+function capToolResultSize(msg: AgentMessage): AgentMessage {
+  const role = (msg as { role?: string }).role;
+  if (role !== "toolResult") {
+    return msg;
+  }
+  const content = (msg as { content?: unknown }).content;
+  if (!Array.isArray(content)) {
+    return msg;
+  }
+
+  // Calculate total text size
+  let totalTextChars = 0;
+  for (const block of content) {
+    if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
+      const text = (block as TextContent).text;
+      if (typeof text === "string") {
+        totalTextChars += text.length;
+      }
+    }
+  }
+
+  if (totalTextChars <= HARD_MAX_TOOL_RESULT_CHARS) {
+    return msg;
+  }
+
+  // Truncate proportionally
+  const newContent = content.map((block: unknown) => {
+    if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
+      return block;
+    }
+    const textBlock = block as TextContent;
+    if (typeof textBlock.text !== "string") {
+      return block;
+    }
+    const blockShare = textBlock.text.length / totalTextChars;
+    const blockBudget = Math.max(
+      2_000,
+      Math.floor(HARD_MAX_TOOL_RESULT_CHARS * blockShare) - GUARD_TRUNCATION_SUFFIX.length,
+    );
+    if (textBlock.text.length <= blockBudget) {
+      return block;
+    }
+    // Try to cut at a newline boundary
+    let cutPoint = blockBudget;
+    const lastNewline = textBlock.text.lastIndexOf("\n", blockBudget);
+    if (lastNewline > blockBudget * 0.8) {
+      cutPoint = lastNewline;
+    }
+    return {
+      ...textBlock,
+      text: textBlock.text.slice(0, cutPoint) + GUARD_TRUNCATION_SUFFIX,
+    };
+  });
+
+  return { ...msg, content: newContent } as AgentMessage;
+}
+
 type ToolCall = { id: string; name?: string };

 function extractAssistantToolCalls(msg: Extract<AgentMessage, { role: "assistant" }>): ToolCall[] {
@@ -116,8 +184,11 @@ export function installSessionToolResultGuard(
      if (id) {
        pending.delete(id);
      }
+      // Apply hard size cap before persistence to prevent oversized tool results
+      // from consuming the entire context window on subsequent LLM calls.
+      const capped = capToolResultSize(nextMessage);
      return originalAppend(
-        persistToolResult(nextMessage, {
+        persistToolResult(capped, {
          toolCallId: id ?? undefined,
          toolName,
          isSynthetic: false,