fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
2026-05-08 15:48:28 +00:00 · 2026-02-07 17:40:51 -08:00
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -52,6 +52,10 @@ import { log } from "./logger.js";
 import { resolveModel } from "./model.js";
 import { runEmbeddedAttempt } from "./run/attempt.js";
 import { buildEmbeddedRunPayloads } from "./run/payloads.js";
+import {
+  truncateOversizedToolResultsInSession,
+  sessionLikelyHasOversizedToolResults,
+} from "./tool-result-truncation.js";
 import { describeUnknownError } from "./utils.js";

 type ApiKeyInfo = ResolvedProviderAuth;
@@ -321,6 +325,7 @@ export async function runEmbeddedPiAgent(

      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
      let overflowCompactionAttempts = 0;
+      let toolResultTruncationAttempted = false;
      try {
        while (true) {
          attemptedThinking.add(thinkLevel);
@@ -437,6 +442,47 @@ export async function runEmbeddedPiAgent(
                  `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
                );
              }
+
+              // Fallback: try truncating oversized tool results in the session.
+              // This handles the case where a single tool result (e.g., reading a
+              // huge file or getting a massive PR diff) exceeds the context window,
+              // and compaction can't help because there's no older history to compact.
+              if (!toolResultTruncationAttempted) {
+                const contextWindowTokens = ctxInfo.tokens;
+                const hasOversized = attempt.messagesSnapshot
+                  ? sessionLikelyHasOversizedToolResults({
+                      messages: attempt.messagesSnapshot,
+                      contextWindowTokens,
+                    })
+                  : false;
+
+                if (hasOversized) {
+                  toolResultTruncationAttempted = true;
+                  log.warn(
+                    `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
+                      `(contextWindow=${contextWindowTokens} tokens)`,
+                  );
+                  const truncResult = await truncateOversizedToolResultsInSession({
+                    sessionFile: params.sessionFile,
+                    contextWindowTokens,
+                    sessionId: params.sessionId,
+                    sessionKey: params.sessionKey,
+                  });
+                  if (truncResult.truncated) {
+                    log.info(
+                      `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
+                    );
+                    // Reset compaction attempts so compaction can be tried again
+                    // after truncation (the session is now smaller)
+                    overflowCompactionAttempts = 0;
+                    continue;
+                  }
+                  log.warn(
+                    `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
+                  );
+                }
+              }
+
              const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
              return {
                payloads: [