fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow

When a subagent reads a very large file or gets a huge tool result (e.g.,
gh pr diff on a massive PR), it can exceed the model's context window in
a single prompt. Auto-compaction can't help because there's no older
history to compact — just one giant tool result.

This adds two layers of defense:

1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens)
   applied in the session tool result guard before persistence. This
   prevents extremely large tool results from being stored in full,
   regardless of model context window size.

2. Recovery: When context overflow is detected and compaction fails,
   scan session messages for oversized tool results relative to the
   model's actual context window (30% max share). If found, truncate
   them in the session via branching (creating a new branch with
   truncated content) and retry the prompt.

The truncation preserves the beginning of the content (most useful for
understanding what was read) and appends a notice explaining the
truncation and suggesting offset/limit parameters for targeted reads.

Includes comprehensive tests for:
- Text truncation with newline-boundary awareness
- Context-window-proportional size calculation
- In-memory message truncation
- Oversized detection heuristics
- Guard-level size capping during persistence

* fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-07 17:40:51 -08:00
committed by GitHub
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions

View File

@@ -52,6 +52,10 @@ import { log } from "./logger.js";
import { resolveModel } from "./model.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import {
truncateOversizedToolResultsInSession,
sessionLikelyHasOversizedToolResults,
} from "./tool-result-truncation.js";
import { describeUnknownError } from "./utils.js";
type ApiKeyInfo = ResolvedProviderAuth;
@@ -321,6 +325,7 @@ export async function runEmbeddedPiAgent(
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
try {
while (true) {
attemptedThinking.add(thinkLevel);
@@ -437,6 +442,47 @@ export async function runEmbeddedPiAgent(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result (e.g., reading a
// huge file or getting a massive PR diff) exceeds the context window,
// and compaction can't help because there's no older history to compact.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Reset compaction attempts so compaction can be tried again
// after truncation (the session is now smaller)
overflowCompactionAttempts = 0;
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
return {
payloads: [