fix(usage): parse Kimi K2 cached_tokens from prompt_tokens_details

Kimi K2 models use automatic prefix caching and return cache stats in
a nested field: usage.prompt_tokens_details.cached_tokens

This fixes issue #7073 where cacheRead was showing 0 for K2.5 users.

Also adds cached_tokens (top-level) for moonshot-v1 explicit caching API.

Closes #7073
This commit is contained in:
Elarwei
2026-02-24 21:21:14 +08:00
committed by Peter Steinberger
parent b511a38fc8
commit aa2826b5b1
2 changed files with 45 additions and 1 deletions

View File

@@ -15,6 +15,10 @@ export type UsageLike = {
completion_tokens?: number;
cache_read_input_tokens?: number;
cache_creation_input_tokens?: number;
// Moonshot/Kimi uses cached_tokens for cache read count (explicit caching API).
cached_tokens?: number;
// Kimi K2 uses prompt_tokens_details.cached_tokens for automatic prefix caching.
prompt_tokens_details?: { cached_tokens?: number };
// Some agents/logs emit alternate naming.
totalTokens?: number;
total_tokens?: number;
@@ -64,7 +68,13 @@ export function normalizeUsage(raw?: UsageLike | null): NormalizedUsage | undefi
raw.completionTokens ??
raw.completion_tokens,
);
const cacheRead = asFiniteNumber(raw.cacheRead ?? raw.cache_read ?? raw.cache_read_input_tokens);
const cacheRead = asFiniteNumber(
raw.cacheRead ??
raw.cache_read ??
raw.cache_read_input_tokens ??
raw.cached_tokens ??
raw.prompt_tokens_details?.cached_tokens,
);
const cacheWrite = asFiniteNumber(
raw.cacheWrite ?? raw.cache_write ?? raw.cache_creation_input_tokens,
);