fix: update totalTokens after compaction using last-call usage (#15018)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 9214291bf7
Co-authored-by: shtse8 <8020099+shtse8@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Kyle Tse
2026-02-12 23:02:30 +00:00
committed by GitHub
parent 033d5b5c15
commit a10f228a5b
10 changed files with 602 additions and 19 deletions

View File

@@ -38,8 +38,7 @@ import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
import { createFollowupRunner } from "./followup-runner.js";
import { enqueueFollowupRun, type FollowupRun, type QueueSettings } from "./queue.js";
import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js";
import { incrementCompactionCount } from "./session-updates.js";
import { persistSessionUsageUpdate } from "./session-usage.js";
import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
import { createTypingSignaler } from "./typing-mode.js";
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
@@ -384,10 +383,11 @@ export async function runReplyAgent(params: {
activeSessionEntry?.contextTokens ??
DEFAULT_CONTEXT_TOKENS;
await persistSessionUsageUpdate({
await persistRunSessionUsage({
storePath,
sessionKey,
usage,
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
modelUsed,
providerUsed,
contextTokensUsed,
@@ -495,11 +495,13 @@ export async function runReplyAgent(params: {
let finalPayloads = replyPayloads;
const verboseEnabled = resolvedVerboseLevel !== "off";
if (autoCompactionCompleted) {
const count = await incrementCompactionCount({
const count = await incrementRunCompactionCount({
sessionEntry: activeSessionEntry,
sessionStore: activeSessionStore,
sessionKey,
storePath,
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
contextTokensUsed,
});
if (verboseEnabled) {
const suffix = typeof count === "number" ? ` (count ${count})` : "";