diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 1207a0c3b0b..d6d00bddd8c 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect( // Measure compactedCount from the original pre-limiting transcript so compaction // lifecycle metrics represent total reduction through the compaction pipeline. const messageCountCompactionInput = messageCountOriginal; + // Estimate full session tokens BEFORE compaction (including system prompt, + // bootstrap context, workspace files, and all history). This is needed for + // a correct sanity check — result.tokensBefore only covers the summarizable + // history subset, not the full session. + let fullSessionTokensBefore = 0; + try { + fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0); + } catch { + // If token estimation throws on a malformed message, skip the sanity check + // instead of crashing compaction. + } const result = await compactWithSafetyTimeout(() => session.compact(params.customInstructions), ); @@ -912,8 +923,17 @@ export async function compactEmbeddedPiSessionDirect( for (const message of session.messages) { tokensAfter += estimateTokens(message); } - // Sanity check: tokensAfter should be less than tokensBefore - if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) { + // Sanity check: compare against the best full-session pre-compaction baseline. + // Prefer the provider-observed live count when available; otherwise use the + // heuristic full-session estimate with a 10% margin for counter jitter. + const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore; + if ( + sanityCheckBaseline > 0 && + tokensAfter > + (observedTokenCount !== undefined + ? sanityCheckBaseline + : sanityCheckBaseline * 1.1) + ) { tokensAfter = undefined; // Don't trust the estimate } } catch {