fix: recover from context overflow caused by oversized tool results (#11579)

* fix: gracefully handle oversized tool results causing context overflow

When a subagent reads a very large file or gets a huge tool result (e.g.,
gh pr diff on a massive PR), it can exceed the model's context window in
a single prompt. Auto-compaction can't help because there's no older
history to compact — just one giant tool result.

This adds two layers of defense:

1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens)
   applied in the session tool result guard before persistence. This
   prevents extremely large tool results from being stored in full,
   regardless of model context window size.

2. Recovery: When context overflow is detected and compaction fails,
   scan session messages for oversized tool results relative to the
   model's actual context window (30% max share). If found, truncate
   them in the session via branching (creating a new branch with
   truncated content) and retry the prompt.

The truncation preserves the beginning of the content (most useful for
understanding what was read) and appends a notice explaining the
truncation and suggesting offset/limit parameters for targeted reads.

Includes comprehensive tests for:
- Text truncation with newline-boundary awareness
- Context-window-proportional size calculation
- In-memory message truncation
- Oversized detection heuristics
- Guard-level size capping during persistence

* fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-07 17:40:51 -08:00
committed by GitHub
parent b8c8130efe
commit 0deb8b0da1
6 changed files with 725 additions and 1 deletions

View File

@@ -206,4 +206,67 @@ describe("installSessionToolResultGuard", () => {
expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]);
});
it("caps oversized tool result text during persistence", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);
sm.appendMessage(toolCallMessage);
sm.appendMessage(
asAppendMessage({
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: "x".repeat(500_000) }],
isError: false,
timestamp: Date.now(),
}),
);
const entries = sm
.getEntries()
.filter((e) => e.type === "message")
.map((e) => (e as { message: AgentMessage }).message);
const toolResult = entries.find((m) => m.role === "toolResult") as {
content: Array<{ type: string; text: string }>;
};
expect(toolResult).toBeDefined();
const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
text: string;
};
expect(textBlock.text.length).toBeLessThan(500_000);
expect(textBlock.text).toContain("truncated");
});
it("does not truncate tool results under the limit", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);
const originalText = "small tool result";
sm.appendMessage(toolCallMessage);
sm.appendMessage(
asAppendMessage({
role: "toolResult",
toolCallId: "call_1",
toolName: "read",
content: [{ type: "text", text: originalText }],
isError: false,
timestamp: Date.now(),
}),
);
const entries = sm
.getEntries()
.filter((e) => e.type === "message")
.map((e) => (e as { message: AgentMessage }).message);
const toolResult = entries.find((m) => m.role === "toolResult") as {
content: Array<{ type: string; text: string }>;
};
const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
text: string;
};
expect(textBlock.text).toBe(originalText);
});
});