fix(compaction): use full-session token count for post-compaction sanity check

Rebased on upstream main.

- Estimate full session tokens (including system prompt, bootstrap context,
  workspace files) before compaction instead of using result.tokensBefore
  which only covers the summarizable history subset
- Add 10% margin to account for heuristic token counter estimation jitter
- Prevents valid token estimates from being discarded in sessions with
  large system prompts or workspace files
This commit is contained in:
liri-ha
2026-03-04 16:24:49 +03:00
committed by Josh Lehman
parent fc2b796f02
commit 424ca99305

View File

@@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect(
// Measure compactedCount from the original pre-limiting transcript so compaction
// lifecycle metrics represent total reduction through the compaction pipeline.
const messageCountCompactionInput = messageCountOriginal;
// Estimate full session tokens BEFORE compaction (including system prompt,
// bootstrap context, workspace files, and all history). This is needed for
// a correct sanity check — result.tokensBefore only covers the summarizable
// history subset, not the full session.
let fullSessionTokensBefore = 0;
try {
fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0);
} catch {
// If token estimation throws on a malformed message, skip the sanity check
// instead of crashing compaction.
}
const result = await compactWithSafetyTimeout(() =>
session.compact(params.customInstructions),
);
@@ -912,8 +923,17 @@ export async function compactEmbeddedPiSessionDirect(
for (const message of session.messages) {
tokensAfter += estimateTokens(message);
}
// Sanity check: tokensAfter should be less than tokensBefore
if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
// Sanity check: compare against the best full-session pre-compaction baseline.
// Prefer the provider-observed live count when available; otherwise use the
// heuristic full-session estimate with a 10% margin for counter jitter.
const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore;
if (
sanityCheckBaseline > 0 &&
tokensAfter >
(observedTokenCount !== undefined
? sanityCheckBaseline
: sanityCheckBaseline * 1.1)
) {
tokensAfter = undefined; // Don't trust the estimate
}
} catch {