fix: context overflow compaction and subagent announce improvements (#11664) (thanks @tyler6204)

* initial commit

* feat: implement deriveSessionTotalTokens function and update usage tests

* Added deriveSessionTotalTokens function to calculate total tokens based on usage and context tokens.
* Updated usage tests to include cases for derived session total tokens.
* Refactored session usage calculations in multiple files to utilize the new function for improved accuracy.

* fix: restore overflow truncation fallback + changelog/test hardening (#11551) (thanks @tyler6204)
This commit is contained in:
Tyler Yust
2026-02-07 20:02:32 -08:00
committed by GitHub
parent 8fae55e8e0
commit 191da1feb5
31 changed files with 889 additions and 178 deletions

View File

@@ -16,6 +16,7 @@ import {
} from "./pi-embedded-helpers.js";
import { createEmbeddedPiSessionEventHandler } from "./pi-embedded-subscribe.handlers.js";
import { formatReasoningMessage } from "./pi-embedded-utils.js";
import { hasNonzeroUsage, normalizeUsage, type UsageLike } from "./usage.js";
const THINKING_TAG_SCAN_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
const FINAL_TAG_SCAN_RE = /<\s*(\/?)\s*final\s*>/gi;
@@ -69,6 +70,14 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
pendingMessagingTexts: new Map(),
pendingMessagingTargets: new Map(),
};
const usageTotals = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
};
let compactionCount = 0;
const assistantTexts = state.assistantTexts;
const toolMetas = state.toolMetas;
@@ -222,6 +231,43 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
state.compactionRetryPromise = null;
}
};
const recordAssistantUsage = (usageLike: unknown) => {
const usage = normalizeUsage((usageLike ?? undefined) as UsageLike | undefined);
if (!hasNonzeroUsage(usage)) {
return;
}
usageTotals.input += usage.input ?? 0;
usageTotals.output += usage.output ?? 0;
usageTotals.cacheRead += usage.cacheRead ?? 0;
usageTotals.cacheWrite += usage.cacheWrite ?? 0;
const usageTotal =
usage.total ??
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
usageTotals.total += usageTotal;
};
const getUsageTotals = () => {
const hasUsage =
usageTotals.input > 0 ||
usageTotals.output > 0 ||
usageTotals.cacheRead > 0 ||
usageTotals.cacheWrite > 0 ||
usageTotals.total > 0;
if (!hasUsage) {
return undefined;
}
const derivedTotal =
usageTotals.input + usageTotals.output + usageTotals.cacheRead + usageTotals.cacheWrite;
return {
input: usageTotals.input || undefined,
output: usageTotals.output || undefined,
cacheRead: usageTotals.cacheRead || undefined,
cacheWrite: usageTotals.cacheWrite || undefined,
total: usageTotals.total || derivedTotal || undefined,
};
};
const incrementCompactionCount = () => {
compactionCount += 1;
};
const blockChunking = params.blockReplyChunking;
const blockChunker = blockChunking ? new EmbeddedBlockChunker(blockChunking) : null;
@@ -530,6 +576,10 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
noteCompactionRetry,
resolveCompactionRetry,
maybeResolveCompactionWait,
recordAssistantUsage,
incrementCompactionCount,
getUsageTotals,
getCompactionCount: () => compactionCount,
};
const unsubscribe = params.session.subscribe(createEmbeddedPiSessionEventHandler(ctx));
@@ -546,6 +596,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
// which is generated AFTER the tool sends the actual answer.
didSendViaMessagingTool: () => messagingToolSentTexts.length > 0,
getLastToolError: () => (state.lastToolError ? { ...state.lastToolError } : undefined),
getUsageTotals,
getCompactionCount: () => compactionCount,
waitForCompactionRetry: () => {
if (state.compactionInFlight || state.pendingCompactionRetry > 0) {
ensureCompactionPromise();