fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-07 20:02:32 -08:00
committed by GitHub
parent 8fae55e8e0
commit 191da1feb5
31 changed files with 889 additions and 178 deletions

View File

@@ -88,6 +88,7 @@ vi.mock("../failover-error.js", () => ({
vi.mock("../usage.js", () => ({
normalizeUsage: vi.fn(() => undefined),
hasNonzeroUsage: vi.fn(() => false),
}));
vi.mock("./lanes.js", () => ({
@@ -108,6 +109,15 @@ vi.mock("./run/payloads.js", () => ({
buildEmbeddedRunPayloads: vi.fn(() => []),
}));
vi.mock("./tool-result-truncation.js", () => ({
truncateOversizedToolResultsInSession: vi.fn(async () => ({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
})),
sessionLikelyHasOversizedToolResults: vi.fn(() => false),
}));
vi.mock("./utils.js", () => ({
describeUnknownError: vi.fn((err: unknown) => {
if (err instanceof Error) {
@@ -140,6 +150,7 @@ vi.mock("../pi-embedded-helpers.js", async () => {
isBillingAssistantError: vi.fn(() => false),
classifyFailoverReason: vi.fn(() => null),
formatAssistantErrorText: vi.fn(() => ""),
parseImageSizeError: vi.fn(() => null),
pickFallbackThinkingLevel: vi.fn(() => null),
isTimeoutErrorMessage: vi.fn(() => false),
parseImageDimensionError: vi.fn(() => null),
@@ -151,9 +162,17 @@ import { compactEmbeddedPiSessionDirect } from "./compact.js";
import { log } from "./logger.js";
import { runEmbeddedPiAgent } from "./run.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import {
sessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession,
} from "./tool-result-truncation.js";
const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect);
const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOversizedToolResults);
const mockedTruncateOversizedToolResultsInSession = vi.mocked(
truncateOversizedToolResultsInSession,
);
function makeAttemptResult(
overrides: Partial<EmbeddedRunAttemptResult> = {},
@@ -188,6 +207,12 @@ const baseParams = {
describe("overflow compaction in run loop", () => {
beforeEach(() => {
vi.clearAllMocks();
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
});
it("retries after successful compaction on context overflow promptError", async () => {
@@ -244,6 +269,43 @@ describe("overflow compaction in run loop", () => {
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
});
it("falls back to tool-result truncation and retries when oversized results are detected", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(
makeAttemptResult({
promptError: overflowError,
messagesSnapshot: [{ role: "assistant", content: "big tool output" }],
}),
)
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 1,
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
expect.objectContaining({ contextWindowTokens: 200000 }),
);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.info).toHaveBeenCalledWith(expect.stringContaining("Truncated 1 tool result(s)"));
expect(result.meta.error).toBeUndefined();
});
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
@@ -323,4 +385,52 @@ describe("overflow compaction in run loop", () => {
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
expect(result.meta.error?.kind).toBe("compaction_failure");
});
it("retries after successful compaction on assistant context overflow errors", async () => {
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(
makeAttemptResult({
promptError: null,
lastAssistant: {
stopReason: "error",
errorMessage: "request_too_large: Request size exceeds model context window",
} as EmbeddedRunAttemptResult["lastAssistant"],
}),
)
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: true,
compacted: true,
result: {
summary: "Compacted session",
firstKeptEntryId: "entry-5",
tokensBefore: 150000,
},
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
expect(result.meta.error).toBeUndefined();
});
it("does not treat stale assistant overflow as current-attempt overflow when promptError is non-overflow", async () => {
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({
promptError: new Error("transport disconnected"),
lastAssistant: {
stopReason: "error",
errorMessage: "request_too_large: Request size exceeds model context window",
} as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
await expect(runEmbeddedPiAgent(baseParams)).rejects.toThrow("transport disconnected");
expect(mockedCompactDirect).not.toHaveBeenCalled();
expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
});
});

View File

@@ -74,6 +74,66 @@ function scrubAnthropicRefusalMagic(prompt: string): string {
);
}
type UsageAccumulator = {
input: number;
output: number;
cacheRead: number;
cacheWrite: number;
total: number;
};
const createUsageAccumulator = (): UsageAccumulator => ({
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
});
const hasUsageValues = (
usage: ReturnType<typeof normalizeUsage>,
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
!!usage &&
[usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
(value) => typeof value === "number" && Number.isFinite(value) && value > 0,
);
const mergeUsageIntoAccumulator = (
target: UsageAccumulator,
usage: ReturnType<typeof normalizeUsage>,
) => {
if (!hasUsageValues(usage)) {
return;
}
target.input += usage.input ?? 0;
target.output += usage.output ?? 0;
target.cacheRead += usage.cacheRead ?? 0;
target.cacheWrite += usage.cacheWrite ?? 0;
target.total +=
usage.total ??
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
};
const toNormalizedUsage = (usage: UsageAccumulator) => {
const hasUsage =
usage.input > 0 ||
usage.output > 0 ||
usage.cacheRead > 0 ||
usage.cacheWrite > 0 ||
usage.total > 0;
if (!hasUsage) {
return undefined;
}
const derivedTotal = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
return {
input: usage.input || undefined,
output: usage.output || undefined,
cacheRead: usage.cacheRead || undefined,
cacheWrite: usage.cacheWrite || undefined,
total: usage.total || derivedTotal || undefined,
};
};
export async function runEmbeddedPiAgent(
params: RunEmbeddedPiAgentParams,
): Promise<EmbeddedPiRunResult> {
@@ -326,6 +386,8 @@ export async function runEmbeddedPiAgent(
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
const usageAccumulator = createUsageAccumulator();
let autoCompactionCount = 0;
try {
while (true) {
attemptedThinking.add(thinkLevel);
@@ -392,119 +454,151 @@ export async function runEmbeddedPiAgent(
});
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
mergeUsageIntoAccumulator(
usageAccumulator,
attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike),
);
autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0);
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
cfg: params.config,
sessionKey: params.sessionKey ?? params.sessionId,
})
: undefined;
const assistantErrorText =
lastAssistant?.stopReason === "error"
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
: undefined;
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
const msgCount = attempt.messagesSnapshot?.length ?? 0;
const contextOverflowError = !aborted
? (() => {
if (promptError) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
return { text: errorText, source: "promptError" as const };
}
// Prompt submission failed with a non-overflow error. Do not
// inspect prior assistant errors from history for this attempt.
return null;
}
if (assistantErrorText && isContextOverflowError(assistantErrorText)) {
return { text: assistantErrorText, source: "assistantError" as const };
}
return null;
})()
: null;
if (contextOverflowError) {
const errorText = contextOverflowError.text;
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
`compactionAttempts=${overflowCompactionAttempts} error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} messages=${msgCount} ` +
`sessionFile=${params.sessionFile} compactionAttempts=${overflowCompactionAttempts} ` +
`error=${errorText.slice(0, 200)}`,
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
authProfileId: lastProfileId,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
provider,
model: modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
});
if (compactResult.compacted) {
autoCompactionCount += 1;
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
continue;
}
log.warn(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result exceeds the
// context window and compaction cannot reduce it further.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const compactResult = await compactEmbeddedPiSessionDirect({
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
messageChannel: params.messageChannel,
messageProvider: params.messageProvider,
agentAccountId: params.agentAccountId,
authProfileId: lastProfileId,
sessionFile: params.sessionFile,
workspaceDir: resolvedWorkspace,
agentDir,
config: params.config,
skillsSnapshot: params.skillsSnapshot,
senderIsOwner: params.senderIsOwner,
provider,
model: modelId,
thinkLevel,
reasoningLevel: params.reasoningLevel,
bashElevated: params.bashElevated,
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
});
if (compactResult.compacted) {
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Session is now smaller; allow compaction retries again.
overflowCompactionAttempts = 0;
continue;
}
log.warn(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result (e.g., reading a
// huge file or getting a massive PR diff) exceeds the context window,
// and compaction can't help because there's no older history to compact.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Reset compaction attempts so compaction can be tried again
// after truncation (the session is now smaller)
overflowCompactionAttempts = 0;
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try again with less input or a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
};
}
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
return {
payloads: [
{
text:
"Context overflow: prompt too large for the model. " +
"Try again with less input or a larger-context model.",
isError: true,
},
],
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: sessionIdUsed,
provider,
model: model.id,
},
systemPromptReport: attempt.systemPromptReport,
error: { kind, message: errorText },
},
};
}
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
// Handle role ordering errors with a user-friendly message
if (/incorrect role information|roles must alternate/i.test(errorText)) {
return {
@@ -702,12 +796,13 @@ export async function runEmbeddedPiAgent(
}
}
const usage = normalizeUsage(lastAssistant?.usage as UsageLike);
const usage = toNormalizedUsage(usageAccumulator);
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};
const payloads = buildEmbeddedRunPayloads({

View File

@@ -650,6 +650,8 @@ export async function runEmbeddedAttempt(
getMessagingToolSentTargets,
didSendViaMessagingTool,
getLastToolError,
getUsageTotals,
getCompactionCount,
} = subscription;
const queueHandle: EmbeddedPiQueueHandle = {
@@ -908,6 +910,8 @@ export async function runEmbeddedAttempt(
cloudCodeAssistFormatError: Boolean(
lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage),
),
attemptUsage: getUsageTotals(),
compactionCount: getCompactionCount(),
// Client tool call detected (OpenResponses hosted tools)
clientToolCall: clientToolCallDetected ?? undefined,
};

View File

@@ -9,6 +9,7 @@ import type { MessagingToolSend } from "../../pi-embedded-messaging.js";
import type { BlockReplyChunking, ToolResultFormat } from "../../pi-embedded-subscribe.js";
import type { AuthStorage, ModelRegistry } from "../../pi-model-discovery.js";
import type { SkillSnapshot } from "../../skills.js";
import type { NormalizedUsage } from "../../usage.js";
import type { ClientToolDefinition } from "./params.js";
export type EmbeddedRunAttemptParams = {
@@ -106,6 +107,8 @@ export type EmbeddedRunAttemptResult = {
messagingToolSentTexts: string[];
messagingToolSentTargets: MessagingToolSend[];
cloudCodeAssistFormatError: boolean;
attemptUsage?: NormalizedUsage;
compactionCount?: number;
/** Client tool call detected (OpenResponses hosted tools). */
clientToolCall?: { name: string; params: Record<string, unknown> };
};

View File

@@ -5,6 +5,7 @@ export type EmbeddedPiAgentMeta = {
sessionId: string;
provider: string;
model: string;
compactionCount?: number;
usage?: {
input?: number;
output?: number;