From 424ca99305e0154b4047378ec33a8952a1b3a055 Mon Sep 17 00:00:00 2001
From: liri-ha <258306986+liri-ha@users.noreply.github.com>
Date: Wed, 4 Mar 2026 16:24:49 +0300
Subject: [PATCH] fix(compaction): use full-session token count for
 post-compaction sanity check

Rebased on upstream main.

- Estimate full session tokens (including system prompt, bootstrap context,
  workspace files) before compaction instead of using result.tokensBefore
  which only covers the summarizable history subset
- Add 10% margin to account for heuristic token counter estimation jitter
- Prevents valid token estimates from being discarded in sessions with
  large system prompts or workspace files
---
 src/agents/pi-embedded-runner/compact.ts | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts
index 1207a0c3b0b..d6d00bddd8c 100644
--- a/src/agents/pi-embedded-runner/compact.ts
+++ b/src/agents/pi-embedded-runner/compact.ts
@@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect(
         // Measure compactedCount from the original pre-limiting transcript so compaction
         // lifecycle metrics represent total reduction through the compaction pipeline.
         const messageCountCompactionInput = messageCountOriginal;
+        // Estimate full session tokens BEFORE compaction (including system prompt,
+        // bootstrap context, workspace files, and all history). This is needed for
+        // a correct sanity check — result.tokensBefore only covers the summarizable
+        // history subset, not the full session.
+        let fullSessionTokensBefore = 0;
+        try {
+          fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0);
+        } catch {
+          // If token estimation throws on a malformed message, skip the sanity check
+          // instead of crashing compaction.
+        }
         const result = await compactWithSafetyTimeout(() =>
           session.compact(params.customInstructions),
         );
@@ -912,8 +923,17 @@ export async function compactEmbeddedPiSessionDirect(
           for (const message of session.messages) {
             tokensAfter += estimateTokens(message);
           }
-          // Sanity check: tokensAfter should be less than tokensBefore
-          if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
+          // Sanity check: compare against the best full-session pre-compaction baseline.
+          // Prefer the provider-observed live count when available; otherwise use the
+          // heuristic full-session estimate with a 10% margin for counter jitter.
+          const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore;
+          if (
+            sanityCheckBaseline > 0 &&
+            tokensAfter >
+              (observedTokenCount !== undefined
+                ? sanityCheckBaseline
+                : sanityCheckBaseline * 1.1)
+          ) {
             tokensAfter = undefined; // Don't trust the estimate
           }
         } catch {