fix(agent): isolate last-turn total in token usage reporting (#17016)

recordAssistantUsage accumulated cacheRead across the entire multi-turn
run, and totalTokens was clamped to contextTokens. This caused
session_status to report 100% context usage regardless of actual load.

Changes:
- run.ts: capture lastTurnTotal from the most recent model call and
  inject it into the normalized usage before it reaches agentMeta.
- usage-reporting.test.ts: verify usage.total reflects current turn,
  not accumulated total.

Fixes #17016
This commit is contained in:
artale
2026-02-16 13:44:22 +01:00
committed by Peter Steinberger
parent d6acd71576
commit a62ff19a66
2 changed files with 121 additions and 0 deletions

View File

@@ -528,6 +528,7 @@ export async function runEmbeddedPiAgent(
// Keep prompt size from the latest model call so session totalTokens
// reflects current context usage, not accumulated tool-loop usage.
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
autoCompactionCount += attemptCompactionCount;
const formattedAssistantErrorText = lastAssistant
@@ -927,6 +928,9 @@ export async function runEmbeddedPiAgent(
}
const usage = toNormalizedUsage(usageAccumulator);
if (usage && lastTurnTotal && lastTurnTotal > 0) {
usage.total = lastTurnTotal;
}
// Extract the last individual API call's usage for context-window
// utilization display. The accumulated `usage` sums input tokens
// across all calls (tool-use loops, compaction retries), which