From a62ff19a6638087d97b10e916a831f32c0b36cd7 Mon Sep 17 00:00:00 2001 From: artale Date: Mon, 16 Feb 2026 13:44:22 +0100 Subject: [PATCH] fix(agent): isolate last-turn total in token usage reporting (#17016) recordAssistantUsage accumulated cacheRead across the entire multi-turn run, and totalTokens was clamped to contextTokens. This caused session_status to report 100% context usage regardless of actual load. Changes: - run.ts: capture lastTurnTotal from the most recent model call and inject it into the normalized usage before it reaches agentMeta. - usage-reporting.test.ts: verify usage.total reflects current turn, not accumulated total. Fixes #17016 --- src/agents/pi-embedded-runner/run.ts | 4 + .../usage-reporting.test.ts | 117 ++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 src/agents/pi-embedded-runner/usage-reporting.test.ts diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index b87d13503e9..ac03047ceb8 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -528,6 +528,7 @@ export async function runEmbeddedPiAgent( // Keep prompt size from the latest model call so session totalTokens // reflects current context usage, not accumulated tool-loop usage. lastRunPromptUsage = lastAssistantUsage ?? attemptUsage; + const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total; const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0); autoCompactionCount += attemptCompactionCount; const formattedAssistantErrorText = lastAssistant @@ -927,6 +928,9 @@ export async function runEmbeddedPiAgent( } const usage = toNormalizedUsage(usageAccumulator); + if (usage && lastTurnTotal && lastTurnTotal > 0) { + usage.total = lastTurnTotal; + } // Extract the last individual API call's usage for context-window // utilization display. The accumulated `usage` sums input tokens // across all calls (tool-use loops, compaction retries), which diff --git a/src/agents/pi-embedded-runner/usage-reporting.test.ts b/src/agents/pi-embedded-runner/usage-reporting.test.ts new file mode 100644 index 00000000000..ae77356d83d --- /dev/null +++ b/src/agents/pi-embedded-runner/usage-reporting.test.ts @@ -0,0 +1,117 @@ +import "./run.overflow-compaction.mocks.shared.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../auth-profiles.js", () => ({ + isProfileInCooldown: vi.fn(() => false), + markAuthProfileFailure: vi.fn(async () => {}), + markAuthProfileGood: vi.fn(async () => {}), + markAuthProfileUsed: vi.fn(async () => {}), +})); + +vi.mock("../usage.js", () => ({ + normalizeUsage: vi.fn((usage?: unknown) => + usage && typeof usage === "object" ? usage : undefined, + ), + derivePromptTokens: vi.fn( + (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => { + if (!usage) { + return undefined; + } + const input = usage.input ?? 0; + const cacheRead = usage.cacheRead ?? 0; + const cacheWrite = usage.cacheWrite ?? 0; + const sum = input + cacheRead + cacheWrite; + return sum > 0 ? sum : undefined; + }, + ), +})); + +vi.mock("../workspace-run.js", () => ({ + resolveRunWorkspaceDir: vi.fn((params: { workspaceDir: string }) => ({ + workspaceDir: params.workspaceDir, + usedFallback: false, + fallbackReason: undefined, + agentId: "main", + })), + redactRunIdentifier: vi.fn((value?: string) => value ?? ""), +})); + +vi.mock("../pi-embedded-helpers.js", () => ({ + formatBillingErrorMessage: vi.fn(() => ""), + classifyFailoverReason: vi.fn(() => null), + formatAssistantErrorText: vi.fn(() => ""), + isAuthAssistantError: vi.fn(() => false), + isBillingAssistantError: vi.fn(() => false), + isCompactionFailureError: vi.fn(() => false), + isLikelyContextOverflowError: vi.fn((msg?: string) => { + const lower = (msg ?? "").toLowerCase(); + return lower.includes("request_too_large") || lower.includes("context window exceeded"); + }), + isFailoverAssistantError: vi.fn(() => false), + isFailoverErrorMessage: vi.fn(() => false), + parseImageSizeError: vi.fn(() => null), + parseImageDimensionError: vi.fn(() => null), + isRateLimitAssistantError: vi.fn(() => false), + isTimeoutErrorMessage: vi.fn(() => false), + pickFallbackThinkingLevel: vi.fn(() => null), +})); + +import { runEmbeddedPiAgent } from "./run.js"; +import { runEmbeddedAttempt } from "./run/attempt.js"; + +const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt); + +describe("runEmbeddedPiAgent usage reporting", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("reports total usage from the last turn instead of accumulated total", async () => { + // Simulate a multi-turn run result. + // Turn 1: Input 100, Output 50. Total 150. + // Turn 2: Input 150, Output 50. Total 200. + + // The accumulated usage (attemptUsage) will be the sum: + // Input: 100 + 150 = 250 (Note: runEmbeddedAttempt actually returns accumulated usage) + // Output: 50 + 50 = 100 + // Total: 150 + 200 = 350 + + // The last assistant usage (lastAssistant.usage) will be Turn 2: + // Input: 150, Output 50, Total 200. + + // We expect result.meta.agentMeta.usage.total to be 200 (last turn total). + // The bug causes it to be 350 (accumulated total). + + mockedRunEmbeddedAttempt.mockResolvedValueOnce({ + aborted: false, + promptError: null, + timedOut: false, + sessionIdUsed: "test-session", + assistantTexts: ["Response 1", "Response 2"], + lastAssistant: { + usage: { input: 150, output: 50, total: 200 }, + stopReason: "end_turn", + }, + attemptUsage: { input: 250, output: 100, total: 350 }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any); + + const result = await runEmbeddedPiAgent({ + sessionId: "test-session", + sessionKey: "test-key", + sessionFile: "/tmp/session.json", + workspaceDir: "/tmp/workspace", + prompt: "hello", + timeoutMs: 30000, + runId: "run-1", + }); + + // Check usage in meta + const usage = result.meta.agentMeta.usage; + expect(usage).toBeDefined(); + + // Check if total matches the last turn's total (200) + // If the bug exists, it will likely be 350 + expect(usage?.total).toBe(200); + }); +});