mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-04-25 10:18:39 +00:00
feat: 添加 Claude 计费特性和请求元信息支持,优化长上下文计费逻辑
This commit is contained in:
@@ -2,8 +2,11 @@
|
||||
* PricingService 长上下文(200K+)分层计费测试
|
||||
*
|
||||
* 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑:
|
||||
* - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
|
||||
* - 所有 token 类型(input/output/cache_create/cache_read)都切换到高档价格
|
||||
* - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
|
||||
* - Claude 缓存价格按输入价格倍率推导:
|
||||
* - 5m cache write = input * 1.25
|
||||
* - 1h cache write = input * 2
|
||||
* - cache read = input * 0.1
|
||||
*/
|
||||
|
||||
// Mock logger to avoid console output during tests
|
||||
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
output_cost_per_token: 0.000015, // $15/MTok
|
||||
cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
|
||||
cache_read_input_token_cost: 0.0000003, // $0.30/MTok
|
||||
max_input_tokens: 1000000,
|
||||
// 200K+ 高档价格
|
||||
input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
|
||||
output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
|
||||
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
output_cost_per_token: 0.00000125,
|
||||
cache_creation_input_token_cost: 0.0000003,
|
||||
cache_read_input_token_cost: 0.00000003
|
||||
},
|
||||
// Fast Mode 适配测试模型(Opus 4.6)
|
||||
'claude-opus-4-6': {
|
||||
input_cost_per_token: 0.000005,
|
||||
output_cost_per_token: 0.000025,
|
||||
cache_creation_input_token_cost: 0.00000625,
|
||||
cache_read_input_token_cost: 0.0000005,
|
||||
input_cost_per_token_above_200k_tokens: 0.00001,
|
||||
output_cost_per_token_above_200k_tokens: 0.0000375
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
expect(result.pricing.input).toBe(0.000006)
|
||||
expect(result.pricing.output).toBe(0.0000225)
|
||||
expect(result.pricing.cacheCreate).toBe(0.0000075)
|
||||
expect(result.pricing.cacheRead).toBe(0.0000006)
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
|
||||
})
|
||||
|
||||
it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
|
||||
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
// cache_read_input_token_cost_above_200k_tokens = 0.0000006
|
||||
expect(result.pricing.cacheRead).toBe(0.0000006)
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
|
||||
expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
|
||||
})
|
||||
})
|
||||
|
||||
describe('详细缓存创建数据(ephemeral_5m / ephemeral_1h)', () => {
|
||||
it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
|
||||
it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
|
||||
const usage = {
|
||||
input_tokens: 200001,
|
||||
output_tokens: 1000,
|
||||
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
// ephemeral_5m: 5000 * 0.0000075 = 0.0000375
|
||||
expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
|
||||
// ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
|
||||
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
|
||||
// 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
|
||||
expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
|
||||
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
|
||||
})
|
||||
})
|
||||
|
||||
describe('回退测试', () => {
|
||||
it('模型无 above_200k 字段时回退到基础价格', () => {
|
||||
it('Claude 模型无 above_200k 字段时,200K+ 输入价格按 2 倍并推导缓存价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 250000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 10000
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
|
||||
|
||||
// 模型没有 above_200k 字段,使用基础价格
|
||||
expect(result.isLongContextRequest).toBe(true) // 超过 200K
|
||||
expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段)
|
||||
expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
|
||||
// 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
|
||||
// 缓存价格由输入价格推导
|
||||
expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
|
||||
})
|
||||
})
|
||||
|
||||
describe('Header 与 Fast Mode 适配', () => {
|
||||
it('无 [1m] 后缀但带 context-1m beta,超过 200K 时应触发长上下文计费', () => {
|
||||
const usage = {
|
||||
input_tokens: 210000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
request_anthropic_beta: 'context-1m-2025-08-07'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.000006)
|
||||
expect(result.pricing.output).toBe(0.0000225)
|
||||
})
|
||||
|
||||
it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
|
||||
const usage = {
|
||||
input_tokens: 100000,
|
||||
output_tokens: 20000,
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 5000,
|
||||
request_anthropic_beta: 'fast-mode-2026-02-01',
|
||||
speed: 'fast'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
|
||||
|
||||
// input: 0.000005 * 6 = 0.00003
|
||||
expect(result.pricing.input).toBeCloseTo(0.00003, 12)
|
||||
// output: 0.000025 * 6 = 0.00015
|
||||
expect(result.pricing.output).toBeCloseTo(0.00015, 12)
|
||||
// cache create/read 由 fast 后 input 推导
|
||||
expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
|
||||
expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
|
||||
})
|
||||
|
||||
it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费(12x input)', () => {
|
||||
const usage = {
|
||||
input_tokens: 210000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 10000,
|
||||
request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
|
||||
speed: 'fast'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
// input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
|
||||
expect(result.pricing.input).toBeCloseTo(0.00006, 12)
|
||||
// output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
|
||||
expect(result.pricing.output).toBeCloseTo(0.000225, 12)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
expect(result.pricing.input).toBe(0.000003) // 基础价格
|
||||
expect(result.pricing.output).toBe(0.000015) // 基础价格
|
||||
expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
|
||||
expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
|
||||
})
|
||||
|
||||
it('[1m] 模型未超过 200K 时使用基础价格', () => {
|
||||
|
||||
Reference in New Issue
Block a user