fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow

When a subagent reads a very large file or gets a huge tool result (e.g.,
gh pr diff on a massive PR), it can exceed the model's context window in
a single prompt. Auto-compaction can't help because there's no older
history to compact — just one giant tool result.

This adds two layers of defense:

1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens)
   applied in the session tool result guard before persistence. This
   prevents extremely large tool results from being stored in full,
   regardless of model context window size.

2. Recovery: When context overflow is detected and compaction fails,
   scan session messages for oversized tool results relative to the
   model's actual context window (30% max share). If found, truncate
   them in the session via branching (creating a new branch with
   truncated content) and retry the prompt.

The truncation preserves the beginning of the content (most useful for
understanding what was read) and appends a notice explaining the
truncation and suggesting offset/limit parameters for targeted reads.

Includes comprehensive tests for:
- Text truncation with newline-boundary awareness
- Context-window-proportional size calculation
- In-memory message truncation
- Oversized detection heuristics
- Guard-level size capping during persistence

* fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-07 17:40:51 -08:00
committed by GitHub
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions

View File

@@ -1,8 +1,76 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { TextContent } from "@mariozechner/pi-ai";
import type { SessionManager } from "@mariozechner/pi-coding-agent";
import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
import { HARD_MAX_TOOL_RESULT_CHARS } from "./pi-embedded-runner/tool-result-truncation.js";
import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";
const GUARD_TRUNCATION_SUFFIX =
"\n\n⚠ [Content truncated during persistence — original exceeded size limit. " +
"Use offset/limit parameters or request specific sections for large content.]";
/**
* Truncate oversized text content blocks in a tool result message.
* Returns the original message if under the limit, or a new message with
* truncated text blocks otherwise.
*/
function capToolResultSize(msg: AgentMessage): AgentMessage {
const role = (msg as { role?: string }).role;
if (role !== "toolResult") {
return msg;
}
const content = (msg as { content?: unknown }).content;
if (!Array.isArray(content)) {
return msg;
}
// Calculate total text size
let totalTextChars = 0;
for (const block of content) {
if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
const text = (block as TextContent).text;
if (typeof text === "string") {
totalTextChars += text.length;
}
}
}
if (totalTextChars <= HARD_MAX_TOOL_RESULT_CHARS) {
return msg;
}
// Truncate proportionally
const newContent = content.map((block: unknown) => {
if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
return block;
}
const textBlock = block as TextContent;
if (typeof textBlock.text !== "string") {
return block;
}
const blockShare = textBlock.text.length / totalTextChars;
const blockBudget = Math.max(
2_000,
Math.floor(HARD_MAX_TOOL_RESULT_CHARS * blockShare) - GUARD_TRUNCATION_SUFFIX.length,
);
if (textBlock.text.length <= blockBudget) {
return block;
}
// Try to cut at a newline boundary
let cutPoint = blockBudget;
const lastNewline = textBlock.text.lastIndexOf("\n", blockBudget);
if (lastNewline > blockBudget * 0.8) {
cutPoint = lastNewline;
}
return {
...textBlock,
text: textBlock.text.slice(0, cutPoint) + GUARD_TRUNCATION_SUFFIX,
};
});
return { ...msg, content: newContent } as AgentMessage;
}
type ToolCall = { id: string; name?: string };
function extractAssistantToolCalls(msg: Extract<AgentMessage, { role: "assistant" }>): ToolCall[] {
@@ -116,8 +184,11 @@ export function installSessionToolResultGuard(
if (id) {
pending.delete(id);
}
// Apply hard size cap before persistence to prevent oversized tool results
// from consuming the entire context window on subsequent LLM calls.
const capped = capToolResultSize(nextMessage);
return originalAppend(
persistToolResult(nextMessage, {
persistToolResult(capped, {
toolCallId: id ?? undefined,
toolName,
isSynthetic: false,