fix(usage): parse Kimi K2 cached_tokens from prompt_tokens_details

Kimi K2 models use automatic prefix caching and return cache stats in
a nested field: usage.prompt_tokens_details.cached_tokens

This fixes issue #7073 where cacheRead was showing 0 for K2.5 users.

Also adds cached_tokens (top-level) for moonshot-v1 explicit caching API.

Closes #7073
This commit is contained in:
Elarwei
2026-02-24 21:21:14 +08:00
committed by Peter Steinberger
parent b511a38fc8
commit aa2826b5b1
2 changed files with 45 additions and 1 deletions

View File

@@ -54,6 +54,40 @@ describe("normalizeUsage", () => {
});
});
it("handles Moonshot/Kimi cached_tokens field", () => {
// Moonshot v1 returns cached_tokens instead of cache_read_input_tokens
const usage = normalizeUsage({
prompt_tokens: 30,
completion_tokens: 9,
total_tokens: 39,
cached_tokens: 19,
});
expect(usage).toEqual({
input: 30,
output: 9,
cacheRead: 19,
cacheWrite: undefined,
total: 39,
});
});
it("handles Kimi K2 prompt_tokens_details.cached_tokens field", () => {
// Kimi K2 uses automatic prefix caching and returns cached_tokens in prompt_tokens_details
const usage = normalizeUsage({
prompt_tokens: 1113,
completion_tokens: 5,
total_tokens: 1118,
prompt_tokens_details: { cached_tokens: 1024 },
});
expect(usage).toEqual({
input: 1113,
output: 5,
cacheRead: 1024,
cacheWrite: undefined,
total: 1118,
});
});
it("returns undefined when no valid fields are provided", () => {
const usage = normalizeUsage(null);
expect(usage).toBeUndefined();