feat: 添加 Claude 计费特性和请求元信息支持,优化长上下文计费逻辑

This commit is contained in:
sczheng189
2026-02-14 21:32:09 +08:00
parent d40c891e4f
commit 3b25cf01ad
4 changed files with 387 additions and 61 deletions

View File

@@ -2,8 +2,11 @@
* PricingService 长上下文200K+)分层计费测试
*
* 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑:
* - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
* - 所有 token 类型input/output/cache_create/cache_read都切换到高档价格
* - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
* - Claude 缓存价格按输入价格倍率推导:
* - 5m cache write = input * 1.25
* - 1h cache write = input * 2
* - cache read = input * 0.1
*/
// Mock logger to avoid console output during tests
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
output_cost_per_token: 0.000015, // $15/MTok
cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
cache_read_input_token_cost: 0.0000003, // $0.30/MTok
max_input_tokens: 1000000,
// 200K+ 高档价格
input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
output_cost_per_token: 0.00000125,
cache_creation_input_token_cost: 0.0000003,
cache_read_input_token_cost: 0.00000003
},
// Fast Mode 适配测试模型Opus 4.6
'claude-opus-4-6': {
input_cost_per_token: 0.000005,
output_cost_per_token: 0.000025,
cache_creation_input_token_cost: 0.00000625,
cache_read_input_token_cost: 0.0000005,
input_cost_per_token_above_200k_tokens: 0.00001,
output_cost_per_token_above_200k_tokens: 0.0000375
}
}
@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.pricing.input).toBe(0.000006)
expect(result.pricing.output).toBe(0.0000225)
expect(result.pricing.cacheCreate).toBe(0.0000075)
expect(result.pricing.cacheRead).toBe(0.0000006)
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
})
it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
// cache_read_input_token_cost_above_200k_tokens = 0.0000006
expect(result.pricing.cacheRead).toBe(0.0000006)
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
})
})
describe('详细缓存创建数据ephemeral_5m / ephemeral_1h', () => {
it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
const usage = {
input_tokens: 200001,
output_tokens: 1000,
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.isLongContextRequest).toBe(true)
// ephemeral_5m: 5000 * 0.0000075 = 0.0000375
expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
// ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
// 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
})
})
describe('回退测试', () => {
it('模型无 above_200k 字段时回退到基础价格', () => {
it('Claude 模型无 above_200k 字段时200K+ 输入价格按 2 倍并推导缓存价格', () => {
const usage = {
input_tokens: 250000,
output_tokens: 1000,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 10000
}
const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
// 模型没有 above_200k 字段,使用基础价格
expect(result.isLongContextRequest).toBe(true) // 超过 200K
expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段)
expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
// 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底
expect(result.isLongContextRequest).toBe(true)
expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
// 缓存价格由输入价格推导
expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
})
})
describe('Header 与 Fast Mode 适配', () => {
it('无 [1m] 后缀但带 context-1m beta超过 200K 时应触发长上下文计费', () => {
const usage = {
input_tokens: 210000,
output_tokens: 1000,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
request_anthropic_beta: 'context-1m-2025-08-07'
}
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
expect(result.isLongContextRequest).toBe(true)
expect(result.pricing.input).toBe(0.000006)
expect(result.pricing.output).toBe(0.0000225)
})
it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
const usage = {
input_tokens: 100000,
output_tokens: 20000,
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 5000,
request_anthropic_beta: 'fast-mode-2026-02-01',
speed: 'fast'
}
const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
// input: 0.000005 * 6 = 0.00003
expect(result.pricing.input).toBeCloseTo(0.00003, 12)
// output: 0.000025 * 6 = 0.00015
expect(result.pricing.output).toBeCloseTo(0.00015, 12)
// cache create/read 由 fast 后 input 推导
expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
})
it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费12x input', () => {
const usage = {
input_tokens: 210000,
output_tokens: 1000,
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 10000,
request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
speed: 'fast'
}
const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
expect(result.isLongContextRequest).toBe(true)
// input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
expect(result.pricing.input).toBeCloseTo(0.00006, 12)
// output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
expect(result.pricing.output).toBeCloseTo(0.000225, 12)
})
})
@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.pricing.input).toBe(0.000003) // 基础价格
expect(result.pricing.output).toBe(0.000015) // 基础价格
expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
})
it('[1m] 模型未超过 200K 时使用基础价格', () => {