From 424ca99305e0154b4047378ec33a8952a1b3a055 Mon Sep 17 00:00:00 2001 From: liri-ha <258306986+liri-ha@users.noreply.github.com> Date: Wed, 4 Mar 2026 16:24:49 +0300 Subject: [PATCH] fix(compaction): use full-session token count for post-compaction sanity check Rebased on upstream main. - Estimate full session tokens (including system prompt, bootstrap context, workspace files) before compaction instead of using result.tokensBefore which only covers the summarizable history subset - Add 10% margin to account for heuristic token counter estimation jitter - Prevents valid token estimates from being discarded in sessions with large system prompts or workspace files --- src/agents/pi-embedded-runner/compact.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 1207a0c3b0b..d6d00bddd8c 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect( // Measure compactedCount from the original pre-limiting transcript so compaction // lifecycle metrics represent total reduction through the compaction pipeline. const messageCountCompactionInput = messageCountOriginal; + // Estimate full session tokens BEFORE compaction (including system prompt, + // bootstrap context, workspace files, and all history). This is needed for + // a correct sanity check — result.tokensBefore only covers the summarizable + // history subset, not the full session. + let fullSessionTokensBefore = 0; + try { + fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0); + } catch { + // If token estimation throws on a malformed message, skip the sanity check + // instead of crashing compaction. + } const result = await compactWithSafetyTimeout(() => session.compact(params.customInstructions), ); @@ -912,8 +923,17 @@ export async function compactEmbeddedPiSessionDirect( for (const message of session.messages) { tokensAfter += estimateTokens(message); } - // Sanity check: tokensAfter should be less than tokensBefore - if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) { + // Sanity check: compare against the best full-session pre-compaction baseline. + // Prefer the provider-observed live count when available; otherwise use the + // heuristic full-session estimate with a 10% margin for counter jitter. + const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore; + if ( + sanityCheckBaseline > 0 && + tokensAfter > + (observedTokenCount !== undefined + ? sanityCheckBaseline + : sanityCheckBaseline * 1.1) + ) { tokensAfter = undefined; // Don't trust the estimate } } catch {