mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 02:01:03 +00:00
fix(compaction): use full-session token count for post-compaction sanity check
Rebased on upstream main. - Estimate full session tokens (including system prompt, bootstrap context, workspace files) before compaction instead of using result.tokensBefore which only covers the summarizable history subset - Add 10% margin to account for heuristic token counter estimation jitter - Prevents valid token estimates from being discarded in sessions with large system prompts or workspace files
This commit is contained in:
@@ -897,6 +897,17 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
// Measure compactedCount from the original pre-limiting transcript so compaction
|
||||
// lifecycle metrics represent total reduction through the compaction pipeline.
|
||||
const messageCountCompactionInput = messageCountOriginal;
|
||||
// Estimate full session tokens BEFORE compaction (including system prompt,
|
||||
// bootstrap context, workspace files, and all history). This is needed for
|
||||
// a correct sanity check — result.tokensBefore only covers the summarizable
|
||||
// history subset, not the full session.
|
||||
let fullSessionTokensBefore = 0;
|
||||
try {
|
||||
fullSessionTokensBefore = limited.reduce((sum, msg) => sum + estimateTokens(msg), 0);
|
||||
} catch {
|
||||
// If token estimation throws on a malformed message, skip the sanity check
|
||||
// instead of crashing compaction.
|
||||
}
|
||||
const result = await compactWithSafetyTimeout(() =>
|
||||
session.compact(params.customInstructions),
|
||||
);
|
||||
@@ -912,8 +923,17 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
for (const message of session.messages) {
|
||||
tokensAfter += estimateTokens(message);
|
||||
}
|
||||
// Sanity check: tokensAfter should be less than tokensBefore
|
||||
if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
|
||||
// Sanity check: compare against the best full-session pre-compaction baseline.
|
||||
// Prefer the provider-observed live count when available; otherwise use the
|
||||
// heuristic full-session estimate with a 10% margin for counter jitter.
|
||||
const sanityCheckBaseline = observedTokenCount ?? fullSessionTokensBefore;
|
||||
if (
|
||||
sanityCheckBaseline > 0 &&
|
||||
tokensAfter >
|
||||
(observedTokenCount !== undefined
|
||||
? sanityCheckBaseline
|
||||
: sanityCheckBaseline * 1.1)
|
||||
) {
|
||||
tokensAfter = undefined; // Don't trust the estimate
|
||||
}
|
||||
} catch {
|
||||
|
||||
Reference in New Issue
Block a user