feat: 添加 Claude 计费特性和请求元信息支持，优化长上下文计费逻辑

2026-05-27 03:43:33 +00:00 · 2026-02-14 21:32:09 +08:00
parent d40c891e4f
commit 3b25cf01ad
4 changed files with 387 additions and 61 deletions
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -468,6 +468,17 @@ async function handleMessagesRequest(req, res) {
                cache_creation_input_tokens: cacheCreateTokens,
                cache_read_input_tokens: cacheReadTokens
              }
+              const requestBetaHeader =
+                _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) {
+                usageObject.request_speed = _requestBody.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }

              // 如果有详细的缓存创建数据，添加到 usage 对象中
              if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -562,6 +573,22 @@ async function handleMessagesRequest(req, res) {
                cache_creation_input_tokens: cacheCreateTokens,
                cache_read_input_tokens: cacheReadTokens
              }
+              const requestBetaHeader =
+                _headersConsole['anthropic-beta'] ||
+                _headersConsole['Anthropic-Beta'] ||
+                _headersConsole['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (
+                typeof _requestBodyConsole?.speed === 'string' &&
+                _requestBodyConsole.speed.trim()
+              ) {
+                usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }

              // 如果有详细的缓存创建数据，添加到 usage 对象中
              if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -728,6 +755,19 @@ async function handleMessagesRequest(req, res) {
                cache_creation_input_tokens: cacheCreateTokens,
                cache_read_input_tokens: cacheReadTokens
              }
+              const requestBetaHeader =
+                _headersCcr['anthropic-beta'] ||
+                _headersCcr['Anthropic-Beta'] ||
+                _headersCcr['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) {
+                usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }

              // 如果有详细的缓存创建数据，添加到 usage 对象中
              if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
--- a/src/routes/openaiClaudeRoutes.js
+++ b/src/routes/openaiClaudeRoutes.js
@@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) {
                (usage.cache_creation.ephemeral_1h_input_tokens || 0)
              : usage.cache_creation_input_tokens || 0) || 0
          const cacheReadTokens = usage.cache_read_input_tokens || 0
+          const usageWithRequestMeta = { ...usage }
+          const requestBetaHeader =
+            req.headers['anthropic-beta'] ||
+            req.headers['Anthropic-Beta'] ||
+            req.headers['ANTHROPIC-BETA']
+          if (requestBetaHeader) {
+            usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
+          }
+          if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
+            usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
+          }

          // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
          apiKeyService
            .recordUsageWithDetails(
              apiKeyData.id,
-              usage, // 直接传递整个 usage 对象，包含可能的 cache_creation 详细数据
+              usageWithRequestMeta, // 传递 usage + 请求模式元信息（beta/speed）
              model,
              accountId,
              accountType
@@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) {
              (usage.cache_creation.ephemeral_1h_input_tokens || 0)
            : usage.cache_creation_input_tokens || 0) || 0
        const cacheReadTokens = usage.cache_read_input_tokens || 0
+        const usageWithRequestMeta = { ...usage }
+        const requestBetaHeader =
+          req.headers['anthropic-beta'] ||
+          req.headers['Anthropic-Beta'] ||
+          req.headers['ANTHROPIC-BETA']
+        if (requestBetaHeader) {
+          usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
+        }
+        if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
+          usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
+        }
        // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
        apiKeyService
          .recordUsageWithDetails(
            apiKeyData.id,
-            usage, // 直接传递整个 usage 对象，包含可能的 cache_creation 详细数据
+            usageWithRequestMeta, // 传递 usage + 请求模式元信息（beta/speed）
            claudeRequest.model,
            accountId,
            accountType
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -63,6 +63,20 @@ class PricingService {
      'claude-haiku-3': 0.0000016,
      'claude-haiku-3-5': 0.0000016
    }
+
+    // Claude Prompt Caching 官方倍率（基于输入价格）
+    this.claudeCacheMultipliers = {
+      write5m: 1.25,
+      write1h: 2,
+      read: 0.1
+    }
+
+    // Claude 扩展计费特性
+    this.claudeFeatureFlags = {
+      context1mBeta: 'context-1m-2025-08-07',
+      fastModeBeta: 'fast-mode-2026-02-01',
+      fastModeSpeed: 'fast'
+    }
  }

  // 初始化价格服务
@@ -451,14 +465,139 @@ class PricingService {
    return pricing
  }

-  // 获取 1 小时缓存价格
-  getEphemeral1hPricing(modelName) {
+  // 从 usage 对象中提取 beta 特性列表（小写）
+  extractBetaFeatures(usage) {
+    const features = new Set()
+    if (!usage || typeof usage !== 'object') {
+      return features
+    }
+
+    const requestHeaders = usage.request_headers || usage.requestHeaders || null
+    const headerBeta =
+      requestHeaders && typeof requestHeaders === 'object'
+        ? requestHeaders['anthropic-beta'] ||
+          requestHeaders['Anthropic-Beta'] ||
+          requestHeaders['ANTHROPIC-BETA']
+        : null
+
+    const candidates = [
+      usage.anthropic_beta,
+      usage.anthropicBeta,
+      usage.request_anthropic_beta,
+      usage.requestAnthropicBeta,
+      usage.beta_header,
+      usage.betaHeader,
+      usage.beta_features,
+      headerBeta
+    ]
+
+    const addFeature = (value) => {
+      if (!value || typeof value !== 'string') {
+        return
+      }
+      value
+        .split(',')
+        .map((item) => item.trim().toLowerCase())
+        .filter(Boolean)
+        .forEach((item) => features.add(item))
+    }
+
+    for (const candidate of candidates) {
+      if (Array.isArray(candidate)) {
+        candidate.forEach(addFeature)
+      } else {
+        addFeature(candidate)
+      }
+    }
+
+    return features
+  }
+
+  // 提取请求/响应中的 speed 字段（小写）
+  extractSpeedSignal(usage) {
+    if (!usage || typeof usage !== 'object') {
+      return { responseSpeed: '', requestSpeed: '' }
+    }
+
+    const normalize = (value) =>
+      typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : ''
+
+    return {
+      responseSpeed: normalize(usage.speed),
+      requestSpeed: normalize(usage.request_speed || usage.requestSpeed)
+    }
+  }
+
+  // Claude Fast Mode 目前仅适用于 Opus 4.6 系列
+  isFastModeEligibleClaudeModel(modelName) {
+    return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
+  }
+
+  // 去掉模型名中的 [1m] 后缀，便于价格查找
+  stripLongContextSuffix(modelName) {
+    if (typeof modelName !== 'string') {
+      return modelName
+    }
+    return modelName.replace(/\[1m\]/gi, '').trim()
+  }
+
+  // 获取 Fast Mode 对应的价格条目（仅匹配 fast/ 前缀）
+  getFastModePricing(modelName) {
+    if (!this.pricingData || !modelName) {
+      return null
+    }
+
+    const cleanedModelName = this.stripLongContextSuffix(modelName)
+    const exactCandidates = new Set([`fast/${cleanedModelName}`])
+
+    if (cleanedModelName.startsWith('fast/')) {
+      exactCandidates.add(cleanedModelName)
+    }
+
+    for (const candidate of exactCandidates) {
+      if (this.pricingData[candidate]) {
+        logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
+        return this.pricingData[candidate]
+      }
+    }
+
+    const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
+    for (const [key, value] of Object.entries(this.pricingData)) {
+      if (!key.startsWith('fast/')) {
+        continue
+      }
+      const normalizedFastKey = key
+        .slice('fast/'.length)
+        .toLowerCase()
+        .replace(/[_-]/g, '')
+      if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) {
+        logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
+        return value
+      }
+    }
+
+    logger.debug(`💰 No fast pricing found for model: ${modelName}`)
+    return null
+  }
+
+  // 获取 1 小时缓存价格（优先使用 model_pricing.json 中的模型字段）
+  getEphemeral1hPricing(modelName, pricing = null) {
+    if (
+      pricing?.cache_creation_input_token_cost_above_1hr !== null &&
+      pricing?.cache_creation_input_token_cost_above_1hr !== undefined
+    ) {
+      return pricing.cache_creation_input_token_cost_above_1hr
+    }
+
    if (!modelName) {
      return 0
    }

    // 尝试直接匹配
-    if (this.ephemeral1hPricing[modelName]) {
+    if (
+      this.ephemeral1hPricing[modelName] !== null &&
+      this.ephemeral1hPricing[modelName] !== undefined
+    ) {
      return this.ephemeral1hPricing[modelName]
    }

@@ -487,8 +626,10 @@ class PricingService {

  // 计算使用费用
  calculateCost(usage, modelName) {
+    const normalizedModelName = this.stripLongContextSuffix(modelName)
+
    // 检查是否为 1M 上下文模型（用户通过 [1m] 后缀主动选择长上下文模式）
-    const isLongContextModel = modelName && modelName.includes('[1m]')
+    const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]')
    let isLongContextRequest = false
    let useLongContextPricing = false

@@ -498,27 +639,31 @@ class PricingService {
    const cacheReadTokens = usage.cache_read_input_tokens || 0
    const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens

-    // 获取模型定价信息
-    const pricing = this.getModelPricing(modelName)
+    // 识别 Claude 特性标识
+    const betaFeatures = this.extractBetaFeatures(usage)
+    const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta)
+    const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta)
+    const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage)
+    const hasFastSpeedSignal =
+      responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
+      requestSpeed === this.claudeFeatureFlags.fastModeSpeed
+    const isFastModeRequest =
+      hasFastModeBeta &&
+      hasFastSpeedSignal &&
+      this.isFastModeEligibleClaudeModel(normalizedModelName)
+    const standardPricing = this.getModelPricing(modelName)
+    const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
+    const pricing = fastPricing || standardPricing
+    const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta

-    // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时，使用高档价格
+    // 当 [1m] 模型总输入超过 200K 时，进入 200K+ 计费逻辑
    // 根据 Anthropic 官方文档：当总输入超过 200K 时，整个请求所有 token 类型都使用高档价格
-    if (isLongContextModel && totalInputTokens > 200000) {
+    if (isLongContextModeEnabled && totalInputTokens > 200000) {
      isLongContextRequest = true
-      // 检查 model_pricing.json 是否有 above_200k 字段
-      if (
-        pricing?.input_cost_per_token_above_200k_tokens !== null &&
-        pricing?.input_cost_per_token_above_200k_tokens !== undefined
-      ) {
-        useLongContextPricing = true
-        logger.info(
-          `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
-        )
-      } else {
-        logger.warn(
-          `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json`
-        )
-      }
+      useLongContextPricing = true
+      logger.info(
+        `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
+      )
    }

    if (!pricing) {
@@ -535,32 +680,76 @@ class PricingService {
      }
    }

-    // 确定实际使用的价格（普通或 200K+ 高档价格）
-    const actualInputPrice = useLongContextPricing
-      ? pricing.input_cost_per_token_above_200k_tokens
-      : pricing.input_cost_per_token || 0
+    const isClaudeModel =
+      (modelName && modelName.toLowerCase().includes('claude')) ||
+      (typeof pricing?.litellm_provider === 'string' &&
+        pricing.litellm_provider.toLowerCase().includes('anthropic'))

-    const actualOutputPrice = useLongContextPricing
-      ? pricing.output_cost_per_token_above_200k_tokens
-      : pricing.output_cost_per_token || 0
+    if (isFastModeRequest && fastPricing) {
+      logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
+    } else if (isFastModeRequest && !fastPricing) {
+      logger.warn(
+        `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
+      )
+    }

-    const actualCacheCreatePrice = useLongContextPricing
-      ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
-        pricing.cache_creation_input_token_cost ||
-        0
-      : pricing.cache_creation_input_token_cost || 0
+    const baseInputPrice = pricing.input_cost_per_token || 0
+    const hasInput200kPrice =
+      pricing.input_cost_per_token_above_200k_tokens !== null &&
+      pricing.input_cost_per_token_above_200k_tokens !== undefined

-    const actualCacheReadPrice = useLongContextPricing
-      ? pricing.cache_read_input_token_cost_above_200k_tokens ||
-        pricing.cache_read_input_token_cost ||
-        0
-      : pricing.cache_read_input_token_cost || 0
+    // 确定实际使用的输入价格（普通或 200K+ 高档价格）
+    // Claude 模型在 200K+ 场景下如果缺少官方字段，按 2 倍输入价兜底
+    let actualInputPrice = useLongContextPricing
+      ? hasInput200kPrice
+        ? pricing.input_cost_per_token_above_200k_tokens
+        : isClaudeModel
+          ? baseInputPrice * 2
+          : baseInputPrice
+      : baseInputPrice

-    // 1小时缓存的 200K+ 价格
-    const actualEphemeral1hPrice = useLongContextPricing
-      ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens ||
-        this.getEphemeral1hPricing(modelName)
-      : this.getEphemeral1hPricing(modelName)
+    const baseOutputPrice = pricing.output_cost_per_token || 0
+    const hasOutput200kPrice =
+      pricing.output_cost_per_token_above_200k_tokens !== null &&
+      pricing.output_cost_per_token_above_200k_tokens !== undefined
+    let actualOutputPrice = useLongContextPricing
+      ? hasOutput200kPrice
+        ? pricing.output_cost_per_token_above_200k_tokens
+        : baseOutputPrice
+      : baseOutputPrice
+
+    let actualCacheCreatePrice = 0
+    let actualCacheReadPrice = 0
+    let actualEphemeral1hPrice = 0
+
+    if (isClaudeModel) {
+      // Claude 模型缓存价格统一按输入价格倍率推导，避免来源字段不一致导致计费偏差
+      actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
+      actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
+      actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
+    } else {
+      actualCacheCreatePrice = useLongContextPricing
+        ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
+          pricing.cache_creation_input_token_cost ||
+          0
+        : pricing.cache_creation_input_token_cost || 0
+
+      actualCacheReadPrice = useLongContextPricing
+        ? pricing.cache_read_input_token_cost_above_200k_tokens ||
+          pricing.cache_read_input_token_cost ||
+          0
+        : pricing.cache_read_input_token_cost || 0
+
+      const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
+
+      // 非 Claude 模型维持原有字段优先级
+      actualEphemeral1hPrice = useLongContextPricing
+        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
+          pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
+          ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
+          : defaultEphemeral1hPrice
+        : defaultEphemeral1hPrice
+    }

    // 计算各项费用
    const inputCost = inputTokens * actualInputPrice
--- a/tests/pricingService.test.js
+++ b/tests/pricingService.test.js
@@ -2,8 +2,11 @@
 * PricingService 长上下文（200K+）分层计费测试
 *
 * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑：
- * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
- * - 所有 token 类型（input/output/cache_create/cache_read）都切换到高档价格
+ * - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
+ * - Claude 缓存价格按输入价格倍率推导：
+ *   - 5m cache write = input * 1.25
+ *   - 1h cache write = input * 2
+ *   - cache read = input * 0.1
 */

 // Mock logger to avoid console output during tests
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      output_cost_per_token: 0.000015, // $15/MTok
      cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
      cache_read_input_token_cost: 0.0000003, // $0.30/MTok
+      max_input_tokens: 1000000,
      // 200K+ 高档价格
      input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
      output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      output_cost_per_token: 0.00000125,
      cache_creation_input_token_cost: 0.0000003,
      cache_read_input_token_cost: 0.00000003
+    },
+    // Fast Mode 适配测试模型（Opus 4.6）
+    'claude-opus-4-6': {
+      input_cost_per_token: 0.000005,
+      output_cost_per_token: 0.000025,
+      cache_creation_input_token_cost: 0.00000625,
+      cache_read_input_token_cost: 0.0000005,
+      input_cost_per_token_above_200k_tokens: 0.00001,
+      output_cost_per_token_above_200k_tokens: 0.0000375
    }
  }

@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.pricing.input).toBe(0.000006)
      expect(result.pricing.output).toBe(0.0000225)
      expect(result.pricing.cacheCreate).toBe(0.0000075)
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
    })

    it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')

      // cache_read_input_token_cost_above_200k_tokens = 0.0000006
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
      expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
    })
  })

  describe('详细缓存创建数据（ephemeral_5m / ephemeral_1h）', () => {
-    it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
+    it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
      const usage = {
        input_tokens: 200001,
        output_tokens: 1000,
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.isLongContextRequest).toBe(true)
      // ephemeral_5m: 5000 * 0.0000075 = 0.0000375
      expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
-      // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
-      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
+      // 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
+      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
    })
  })

  describe('回退测试', () => {
-    it('模型无 above_200k 字段时回退到基础价格', () => {
+    it('Claude 模型无 above_200k 字段时，200K+ 输入价格按 2 倍并推导缓存价格', () => {
      const usage = {
        input_tokens: 250000,
        output_tokens: 1000,
-        cache_creation_input_tokens: 0,
-        cache_read_input_tokens: 0
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000
      }

      const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')

-      // 模型没有 above_200k 字段，使用基础价格
-      expect(result.isLongContextRequest).toBe(true) // 超过 200K
-      expect(result.pricing.input).toBe(0.00000025) // 基础价格（没有 above_200k 字段）
-      expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
+      // 模型没有 above_200k 字段，Claude 200K+ 输入按 2 倍兜底
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
+      // 缓存价格由输入价格推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
+    })
+  })
+
+  describe('Header 与 Fast Mode 适配', () => {
+    it('无 [1m] 后缀但带 context-1m beta，超过 200K 时应触发长上下文计费', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        request_anthropic_beta: 'context-1m-2025-08-07'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
+
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.000006)
+      expect(result.pricing.output).toBe(0.0000225)
+    })
+
+    it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
+      const usage = {
+        input_tokens: 100000,
+        output_tokens: 20000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 5000,
+        request_anthropic_beta: 'fast-mode-2026-02-01',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
+
+      // input: 0.000005 * 6 = 0.00003
+      expect(result.pricing.input).toBeCloseTo(0.00003, 12)
+      // output: 0.000025 * 6 = 0.00015
+      expect(result.pricing.output).toBeCloseTo(0.00015, 12)
+      // cache create/read 由 fast 后 input 推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
+    })
+
+    it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费（12x input）', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000,
+        request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      // input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
+      expect(result.pricing.input).toBeCloseTo(0.00006, 12)
+      // output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
+      expect(result.pricing.output).toBeCloseTo(0.000225, 12)
    })
  })

@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
      expect(result.pricing.input).toBe(0.000003) // 基础价格
      expect(result.pricing.output).toBe(0.000015) // 基础价格
      expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
-      expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
    })

    it('[1m] 模型未超过 200K 时使用基础价格', () => {