fix: update totalTokens after compaction using last-call usage (#15018)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 9214291bf7
Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Kyle Tse
2026-02-12 23:02:30 +00:00
committed by GitHub
parent 033d5b5c15
commit a10f228a5b
10 changed files with 602 additions and 19 deletions

View File

@@ -820,11 +820,18 @@ export async function runEmbeddedPiAgent(
}
const usage = toNormalizedUsage(usageAccumulator);
// Extract the last individual API call's usage for context-window
// utilization display. The accumulated `usage` sums input tokens
// across all calls (tool-use loops, compaction retries), which
// overstates the actual context size. `lastCallUsage` reflects only
// the final call, giving an accurate snapshot of current context.
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
lastCallUsage: lastCallUsage ?? undefined,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};

View File

@@ -13,6 +13,20 @@ export type EmbeddedPiAgentMeta = {
cacheWrite?: number;
total?: number;
};
/**
* Usage from the last individual API call (not accumulated across tool-use
* loops or compaction retries). Used for context-window utilization display
* (`totalTokens` in sessions.json) because the accumulated `usage.input`
* sums input tokens from every API call in the run, which overstates the
* actual context size.
*/
lastCallUsage?: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
total?: number;
};
};
export type EmbeddedPiRunMeta = {