fix(agent): isolate last-turn total in token usage reporting (#17016)

recordAssistantUsage accumulated cacheRead across the entire multi-turn run, and totalTokens was clamped to contextTokens. This caused session_status to report 100% context usage regardless of actual load. Changes: - run.ts: capture lastTurnTotal from the most recent model call and inject it into the normalized usage before it reaches agentMeta. - usage-reporting.test.ts: verify usage.total reflects current turn, not accumulated total. Fixes #17016
2026-05-08 19:18:26 +00:00 · 2026-02-16 13:44:22 +01:00
parent d6acd71576
commit a62ff19a66
2 changed files with 121 additions and 0 deletions
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -528,6 +528,7 @@ export async function runEmbeddedPiAgent(
          // Keep prompt size from the latest model call so session totalTokens
          // reflects current context usage, not accumulated tool-loop usage.
          lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
+          const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
          const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
          autoCompactionCount += attemptCompactionCount;
          const formattedAssistantErrorText = lastAssistant
@@ -927,6 +928,9 @@ export async function runEmbeddedPiAgent(
          }

          const usage = toNormalizedUsage(usageAccumulator);
+          if (usage && lastTurnTotal && lastTurnTotal > 0) {
+            usage.total = lastTurnTotal;
+          }
          // Extract the last individual API call's usage for context-window
          // utilization display. The accumulated `usage` sums input tokens
          // across all calls (tool-use loops, compaction retries), which
--- a/src/agents/pi-embedded-runner/usage-reporting.test.ts
+++ b/src/agents/pi-embedded-runner/usage-reporting.test.ts
@@ -0,0 +1,117 @@
+import "./run.overflow-compaction.mocks.shared.js";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("../auth-profiles.js", () => ({
+  isProfileInCooldown: vi.fn(() => false),
+  markAuthProfileFailure: vi.fn(async () => {}),
+  markAuthProfileGood: vi.fn(async () => {}),
+  markAuthProfileUsed: vi.fn(async () => {}),
+}));
+
+vi.mock("../usage.js", () => ({
+  normalizeUsage: vi.fn((usage?: unknown) =>
+    usage && typeof usage === "object" ? usage : undefined,
+  ),
+  derivePromptTokens: vi.fn(
+    (usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
+      if (!usage) {
+        return undefined;
+      }
+      const input = usage.input ?? 0;
+      const cacheRead = usage.cacheRead ?? 0;
+      const cacheWrite = usage.cacheWrite ?? 0;
+      const sum = input + cacheRead + cacheWrite;
+      return sum > 0 ? sum : undefined;
+    },
+  ),
+}));
+
+vi.mock("../workspace-run.js", () => ({
+  resolveRunWorkspaceDir: vi.fn((params: { workspaceDir: string }) => ({
+    workspaceDir: params.workspaceDir,
+    usedFallback: false,
+    fallbackReason: undefined,
+    agentId: "main",
+  })),
+  redactRunIdentifier: vi.fn((value?: string) => value ?? ""),
+}));
+
+vi.mock("../pi-embedded-helpers.js", () => ({
+  formatBillingErrorMessage: vi.fn(() => ""),
+  classifyFailoverReason: vi.fn(() => null),
+  formatAssistantErrorText: vi.fn(() => ""),
+  isAuthAssistantError: vi.fn(() => false),
+  isBillingAssistantError: vi.fn(() => false),
+  isCompactionFailureError: vi.fn(() => false),
+  isLikelyContextOverflowError: vi.fn((msg?: string) => {
+    const lower = (msg ?? "").toLowerCase();
+    return lower.includes("request_too_large") || lower.includes("context window exceeded");
+  }),
+  isFailoverAssistantError: vi.fn(() => false),
+  isFailoverErrorMessage: vi.fn(() => false),
+  parseImageSizeError: vi.fn(() => null),
+  parseImageDimensionError: vi.fn(() => null),
+  isRateLimitAssistantError: vi.fn(() => false),
+  isTimeoutErrorMessage: vi.fn(() => false),
+  pickFallbackThinkingLevel: vi.fn(() => null),
+}));
+
+import { runEmbeddedPiAgent } from "./run.js";
+import { runEmbeddedAttempt } from "./run/attempt.js";
+
+const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
+
+describe("runEmbeddedPiAgent usage reporting", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("reports total usage from the last turn instead of accumulated total", async () => {
+    // Simulate a multi-turn run result.
+    // Turn 1: Input 100, Output 50. Total 150.
+    // Turn 2: Input 150, Output 50. Total 200.
+
+    // The accumulated usage (attemptUsage) will be the sum:
+    // Input: 100 + 150 = 250 (Note: runEmbeddedAttempt actually returns accumulated usage)
+    // Output: 50 + 50 = 100
+    // Total: 150 + 200 = 350
+
+    // The last assistant usage (lastAssistant.usage) will be Turn 2:
+    // Input: 150, Output 50, Total 200.
+
+    // We expect result.meta.agentMeta.usage.total to be 200 (last turn total).
+    // The bug causes it to be 350 (accumulated total).
+
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce({
+      aborted: false,
+      promptError: null,
+      timedOut: false,
+      sessionIdUsed: "test-session",
+      assistantTexts: ["Response 1", "Response 2"],
+      lastAssistant: {
+        usage: { input: 150, output: 50, total: 200 },
+        stopReason: "end_turn",
+      },
+      attemptUsage: { input: 250, output: 100, total: 350 },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    } as any);
+
+    const result = await runEmbeddedPiAgent({
+      sessionId: "test-session",
+      sessionKey: "test-key",
+      sessionFile: "/tmp/session.json",
+      workspaceDir: "/tmp/workspace",
+      prompt: "hello",
+      timeoutMs: 30000,
+      runId: "run-1",
+    });
+
+    // Check usage in meta
+    const usage = result.meta.agentMeta.usage;
+    expect(usage).toBeDefined();
+
+    // Check if total matches the last turn's total (200)
+    // If the bug exists, it will likely be 350
+    expect(usage?.total).toBe(200);
+  });
+});