feat: 添加 Claude 计费特性和请求元信息支持，优化长上下文计费逻辑

2026-04-25 10:18:39 +00:00 · 2026-02-14 21:32:09 +08:00
parent d40c891e4f
commit 3b25cf01ad
4 changed files with 387 additions and 61 deletions
--- a/tests/pricingService.test.js
+++ b/tests/pricingService.test.js
@@ -2,8 +2,11 @@
 * PricingService 长上下文（200K+）分层计费测试
 *
 * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑：
- * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
- * - 所有 token 类型（input/output/cache_create/cache_read）都切换到高档价格
+ * - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
+ * - Claude 缓存价格按输入价格倍率推导：
+ *   - 5m cache write = input * 1.25
+ *   - 1h cache write = input * 2
+ *   - cache read = input * 0.1
 */

 // Mock logger to avoid console output during tests
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      output_cost_per_token: 0.000015, // $15/MTok
      cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
      cache_read_input_token_cost: 0.0000003, // $0.30/MTok
+      max_input_tokens: 1000000,
      // 200K+ 高档价格
      input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
      output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      output_cost_per_token: 0.00000125,
      cache_creation_input_token_cost: 0.0000003,
      cache_read_input_token_cost: 0.00000003
+    },
+    // Fast Mode 适配测试模型（Opus 4.6）
+    'claude-opus-4-6': {
+      input_cost_per_token: 0.000005,
+      output_cost_per_token: 0.000025,
+      cache_creation_input_token_cost: 0.00000625,
+      cache_read_input_token_cost: 0.0000005,
+      input_cost_per_token_above_200k_tokens: 0.00001,
+      output_cost_per_token_above_200k_tokens: 0.0000375
    }
  }

@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.pricing.input).toBe(0.000006)
      expect(result.pricing.output).toBe(0.0000225)
      expect(result.pricing.cacheCreate).toBe(0.0000075)
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
    })

    it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')

      // cache_read_input_token_cost_above_200k_tokens = 0.0000006
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
      expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
    })
  })

  describe('详细缓存创建数据（ephemeral_5m / ephemeral_1h）', () => {
-    it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
+    it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
      const usage = {
        input_tokens: 200001,
        output_tokens: 1000,
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.isLongContextRequest).toBe(true)
      // ephemeral_5m: 5000 * 0.0000075 = 0.0000375
      expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
-      // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
-      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
+      // 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
+      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
    })
  })

  describe('回退测试', () => {
-    it('模型无 above_200k 字段时回退到基础价格', () => {
+    it('Claude 模型无 above_200k 字段时，200K+ 输入价格按 2 倍并推导缓存价格', () => {
      const usage = {
        input_tokens: 250000,
        output_tokens: 1000,
-        cache_creation_input_tokens: 0,
-        cache_read_input_tokens: 0
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000
      }

      const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')

-      // 模型没有 above_200k 字段，使用基础价格
-      expect(result.isLongContextRequest).toBe(true) // 超过 200K
-      expect(result.pricing.input).toBe(0.00000025) // 基础价格（没有 above_200k 字段）
-      expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
+      // 模型没有 above_200k 字段，Claude 200K+ 输入按 2 倍兜底
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
+      // 缓存价格由输入价格推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
+    })
+  })
+
+  describe('Header 与 Fast Mode 适配', () => {
+    it('无 [1m] 后缀但带 context-1m beta，超过 200K 时应触发长上下文计费', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        request_anthropic_beta: 'context-1m-2025-08-07'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
+
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.000006)
+      expect(result.pricing.output).toBe(0.0000225)
+    })
+
+    it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
+      const usage = {
+        input_tokens: 100000,
+        output_tokens: 20000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 5000,
+        request_anthropic_beta: 'fast-mode-2026-02-01',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
+
+      // input: 0.000005 * 6 = 0.00003
+      expect(result.pricing.input).toBeCloseTo(0.00003, 12)
+      // output: 0.000025 * 6 = 0.00015
+      expect(result.pricing.output).toBeCloseTo(0.00015, 12)
+      // cache create/read 由 fast 后 input 推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
+    })
+
+    it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费（12x input）', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000,
+        request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      // input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
+      expect(result.pricing.input).toBeCloseTo(0.00006, 12)
+      // output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
+      expect(result.pricing.output).toBeCloseTo(0.000225, 12)
    })
  })

@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.pricing.input).toBe(0.000003) // 基础价格
      expect(result.pricing.output).toBe(0.000015) // 基础价格
      expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
-      expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
    })

    it('[1m] 模型未超过 200K 时使用基础价格', () => {