mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 19:18:26 +00:00
fix(agent): isolate last-turn total in token usage reporting (#17016)
recordAssistantUsage accumulated cacheRead across the entire multi-turn run, and totalTokens was clamped to contextTokens. This caused session_status to report 100% context usage regardless of actual load. Changes: - run.ts: capture lastTurnTotal from the most recent model call and inject it into the normalized usage before it reaches agentMeta. - usage-reporting.test.ts: verify usage.total reflects current turn, not accumulated total. Fixes #17016
This commit is contained in:
committed by
Peter Steinberger
parent
d6acd71576
commit
a62ff19a66
@@ -528,6 +528,7 @@ export async function runEmbeddedPiAgent(
|
||||
// Keep prompt size from the latest model call so session totalTokens
|
||||
// reflects current context usage, not accumulated tool-loop usage.
|
||||
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
|
||||
const lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
|
||||
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
|
||||
autoCompactionCount += attemptCompactionCount;
|
||||
const formattedAssistantErrorText = lastAssistant
|
||||
@@ -927,6 +928,9 @@ export async function runEmbeddedPiAgent(
|
||||
}
|
||||
|
||||
const usage = toNormalizedUsage(usageAccumulator);
|
||||
if (usage && lastTurnTotal && lastTurnTotal > 0) {
|
||||
usage.total = lastTurnTotal;
|
||||
}
|
||||
// Extract the last individual API call's usage for context-window
|
||||
// utilization display. The accumulated `usage` sums input tokens
|
||||
// across all calls (tool-use loops, compaction retries), which
|
||||
|
||||
117
src/agents/pi-embedded-runner/usage-reporting.test.ts
Normal file
117
src/agents/pi-embedded-runner/usage-reporting.test.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
import "./run.overflow-compaction.mocks.shared.js";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("../auth-profiles.js", () => ({
|
||||
isProfileInCooldown: vi.fn(() => false),
|
||||
markAuthProfileFailure: vi.fn(async () => {}),
|
||||
markAuthProfileGood: vi.fn(async () => {}),
|
||||
markAuthProfileUsed: vi.fn(async () => {}),
|
||||
}));
|
||||
|
||||
vi.mock("../usage.js", () => ({
|
||||
normalizeUsage: vi.fn((usage?: unknown) =>
|
||||
usage && typeof usage === "object" ? usage : undefined,
|
||||
),
|
||||
derivePromptTokens: vi.fn(
|
||||
(usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
|
||||
if (!usage) {
|
||||
return undefined;
|
||||
}
|
||||
const input = usage.input ?? 0;
|
||||
const cacheRead = usage.cacheRead ?? 0;
|
||||
const cacheWrite = usage.cacheWrite ?? 0;
|
||||
const sum = input + cacheRead + cacheWrite;
|
||||
return sum > 0 ? sum : undefined;
|
||||
},
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../workspace-run.js", () => ({
|
||||
resolveRunWorkspaceDir: vi.fn((params: { workspaceDir: string }) => ({
|
||||
workspaceDir: params.workspaceDir,
|
||||
usedFallback: false,
|
||||
fallbackReason: undefined,
|
||||
agentId: "main",
|
||||
})),
|
||||
redactRunIdentifier: vi.fn((value?: string) => value ?? ""),
|
||||
}));
|
||||
|
||||
vi.mock("../pi-embedded-helpers.js", () => ({
|
||||
formatBillingErrorMessage: vi.fn(() => ""),
|
||||
classifyFailoverReason: vi.fn(() => null),
|
||||
formatAssistantErrorText: vi.fn(() => ""),
|
||||
isAuthAssistantError: vi.fn(() => false),
|
||||
isBillingAssistantError: vi.fn(() => false),
|
||||
isCompactionFailureError: vi.fn(() => false),
|
||||
isLikelyContextOverflowError: vi.fn((msg?: string) => {
|
||||
const lower = (msg ?? "").toLowerCase();
|
||||
return lower.includes("request_too_large") || lower.includes("context window exceeded");
|
||||
}),
|
||||
isFailoverAssistantError: vi.fn(() => false),
|
||||
isFailoverErrorMessage: vi.fn(() => false),
|
||||
parseImageSizeError: vi.fn(() => null),
|
||||
parseImageDimensionError: vi.fn(() => null),
|
||||
isRateLimitAssistantError: vi.fn(() => false),
|
||||
isTimeoutErrorMessage: vi.fn(() => false),
|
||||
pickFallbackThinkingLevel: vi.fn(() => null),
|
||||
}));
|
||||
|
||||
import { runEmbeddedPiAgent } from "./run.js";
|
||||
import { runEmbeddedAttempt } from "./run/attempt.js";
|
||||
|
||||
const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
|
||||
|
||||
describe("runEmbeddedPiAgent usage reporting", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("reports total usage from the last turn instead of accumulated total", async () => {
|
||||
// Simulate a multi-turn run result.
|
||||
// Turn 1: Input 100, Output 50. Total 150.
|
||||
// Turn 2: Input 150, Output 50. Total 200.
|
||||
|
||||
// The accumulated usage (attemptUsage) will be the sum:
|
||||
// Input: 100 + 150 = 250 (Note: runEmbeddedAttempt actually returns accumulated usage)
|
||||
// Output: 50 + 50 = 100
|
||||
// Total: 150 + 200 = 350
|
||||
|
||||
// The last assistant usage (lastAssistant.usage) will be Turn 2:
|
||||
// Input: 150, Output 50, Total 200.
|
||||
|
||||
// We expect result.meta.agentMeta.usage.total to be 200 (last turn total).
|
||||
// The bug causes it to be 350 (accumulated total).
|
||||
|
||||
mockedRunEmbeddedAttempt.mockResolvedValueOnce({
|
||||
aborted: false,
|
||||
promptError: null,
|
||||
timedOut: false,
|
||||
sessionIdUsed: "test-session",
|
||||
assistantTexts: ["Response 1", "Response 2"],
|
||||
lastAssistant: {
|
||||
usage: { input: 150, output: 50, total: 200 },
|
||||
stopReason: "end_turn",
|
||||
},
|
||||
attemptUsage: { input: 250, output: 100, total: 350 },
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any);
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
sessionId: "test-session",
|
||||
sessionKey: "test-key",
|
||||
sessionFile: "/tmp/session.json",
|
||||
workspaceDir: "/tmp/workspace",
|
||||
prompt: "hello",
|
||||
timeoutMs: 30000,
|
||||
runId: "run-1",
|
||||
});
|
||||
|
||||
// Check usage in meta
|
||||
const usage = result.meta.agentMeta.usage;
|
||||
expect(usage).toBeDefined();
|
||||
|
||||
// Check if total matches the last turn's total (200)
|
||||
// If the bug exists, it will likely be 350
|
||||
expect(usage?.total).toBe(200);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user