From 1d90500de91610cf703c87dac43b11bd38110551 Mon Sep 17 00:00:00 2001
From: sczheng <yangkaiyi@itiger.com>
Date: Fri, 6 Feb 2026 11:58:54 +0800
Subject: [PATCH 1/6] =?UTF-8?q?mod:=20=E4=BF=AE=E6=94=B91m=E4=B8=8A?=
 =?UTF-8?q?=E4=B8=8B=E6=96=87=E7=9A=84=E7=A1=AC=E7=BC=96=E7=A0=81=E8=B4=B9?=
 =?UTF-8?q?=E7=94=A8=E8=AE=A1=E7=AE=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/services/pricingService.js | 152 +++++++---------
 tests/pricingService.test.js   | 322 +++++++++++++++++++++++++++++++++
 2 files changed, 391 insertions(+), 83 deletions(-)
 create mode 100644 tests/pricingService.test.js

diff --git a/src/services/pricingService.js b/src/services/pricingService.js
index a8f37667..6f513f13 100644
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -63,17 +63,6 @@ class PricingService {
       'claude-haiku-3': 0.0000016,
       'claude-haiku-3-5': 0.0000016
     }
-
-    // 硬编码的 1M 上下文模型价格（美元/token）
-    // 当总输入 tokens 超过 200k 时使用这些价格
-    this.longContextPricing = {
-      // claude-sonnet-4-20250514[1m] 模型的 1M 上下文价格
-      'claude-sonnet-4-20250514[1m]': {
-        input: 0.000006, // $6/MTok
-        output: 0.0000225 // $22.50/MTok
-      }
-      // 未来可以添加更多 1M 模型的价格
-    }
   }
 
   // 初始化价格服务
@@ -498,40 +487,41 @@ class PricingService {
 
   // 计算使用费用
   calculateCost(usage, modelName) {
-    // 检查是否为 1M 上下文模型
+    // 检查是否为 1M 上下文模型（用户通过 [1m] 后缀主动选择长上下文模式）
     const isLongContextModel = modelName && modelName.includes('[1m]')
     let isLongContextRequest = false
     let useLongContextPricing = false
 
-    if (isLongContextModel) {
-      // 计算总输入 tokens
-      const inputTokens = usage.input_tokens || 0
-      const cacheCreationTokens = usage.cache_creation_input_tokens || 0
-      const cacheReadTokens = usage.cache_read_input_tokens || 0
-      const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
+    // 计算总输入 tokens（用于判断是否超过 200K 阈值）
+    const inputTokens = usage.input_tokens || 0
+    const cacheCreationTokens = usage.cache_creation_input_tokens || 0
+    const cacheReadTokens = usage.cache_read_input_tokens || 0
+    const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
 
-      // 如果总输入超过 200k，使用 1M 上下文价格
-      if (totalInputTokens > 200000) {
-        isLongContextRequest = true
-        // 检查是否有硬编码的 1M 价格
-        if (this.longContextPricing[modelName]) {
-          useLongContextPricing = true
-        } else {
-          // 如果没有找到硬编码价格，使用第一个 1M 模型的价格作为默认
-          const defaultLongContextModel = Object.keys(this.longContextPricing)[0]
-          if (defaultLongContextModel) {
-            useLongContextPricing = true
-            logger.warn(
-              `⚠️ No specific 1M pricing for ${modelName}, using default from ${defaultLongContextModel}`
-            )
-          }
-        }
+    // 获取模型定价信息
+    const pricing = this.getModelPricing(modelName)
+
+    // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时，使用高档价格
+    // 根据 Anthropic 官方文档：当总输入超过 200K 时，整个请求所有 token 类型都使用高档价格
+    if (isLongContextModel && totalInputTokens > 200000) {
+      isLongContextRequest = true
+      // 检查 model_pricing.json 是否有 above_200k 字段
+      if (
+        pricing?.input_cost_per_token_above_200k_tokens !== null &&
+        pricing?.input_cost_per_token_above_200k_tokens !== undefined
+      ) {
+        useLongContextPricing = true
+        logger.info(
+          `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
+        )
+      } else {
+        logger.warn(
+          `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json`
+        )
       }
     }
 
-    const pricing = this.getModelPricing(modelName)
-
-    if (!pricing && !useLongContextPricing) {
+    if (!pricing) {
       return {
         inputCost: 0,
         outputCost: 0,
@@ -545,59 +535,65 @@ class PricingService {
       }
     }
 
-    let inputCost = 0
-    let outputCost = 0
+    // 确定实际使用的价格（普通或 200K+ 高档价格）
+    const actualInputPrice = useLongContextPricing
+      ? pricing.input_cost_per_token_above_200k_tokens
+      : pricing.input_cost_per_token || 0
 
-    if (useLongContextPricing) {
-      // 使用 1M 上下文特殊价格（仅输入和输出价格改变）
-      const longContextPrices =
-        this.longContextPricing[modelName] ||
-        this.longContextPricing[Object.keys(this.longContextPricing)[0]]
+    const actualOutputPrice = useLongContextPricing
+      ? pricing.output_cost_per_token_above_200k_tokens
+      : pricing.output_cost_per_token || 0
 
-      inputCost = (usage.input_tokens || 0) * longContextPrices.input
-      outputCost = (usage.output_tokens || 0) * longContextPrices.output
+    const actualCacheCreatePrice = useLongContextPricing
+      ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
+        pricing.cache_creation_input_token_cost ||
+        0
+      : pricing.cache_creation_input_token_cost || 0
 
-      logger.info(
-        `💰 Using 1M context pricing for ${modelName}: input=$${longContextPrices.input}/token, output=$${longContextPrices.output}/token`
-      )
-    } else {
-      // 使用正常价格
-      inputCost = (usage.input_tokens || 0) * (pricing?.input_cost_per_token || 0)
-      outputCost = (usage.output_tokens || 0) * (pricing?.output_cost_per_token || 0)
-    }
+    const actualCacheReadPrice = useLongContextPricing
+      ? pricing.cache_read_input_token_cost_above_200k_tokens ||
+        pricing.cache_read_input_token_cost ||
+        0
+      : pricing.cache_read_input_token_cost || 0
 
-    // 缓存价格保持不变（即使对于 1M 模型）
-    const cacheReadCost =
-      (usage.cache_read_input_tokens || 0) * (pricing?.cache_read_input_token_cost || 0)
+    // 1小时缓存的 200K+ 价格
+    const actualEphemeral1hPrice = useLongContextPricing
+      ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens ||
+        this.getEphemeral1hPricing(modelName)
+      : this.getEphemeral1hPricing(modelName)
 
-    // 处理缓存创建费用：
-    // 1. 如果有详细的 cache_creation 对象，使用它
-    // 2. 否则使用总的 cache_creation_input_tokens（向后兼容）
+    // 计算各项费用
+    const inputCost = inputTokens * actualInputPrice
+    const outputCost = (usage.output_tokens || 0) * actualOutputPrice
+
+    // 处理缓存费用
     let ephemeral5mCost = 0
     let ephemeral1hCost = 0
     let cacheCreateCost = 0
+    let cacheReadCost = 0
 
     if (usage.cache_creation && typeof usage.cache_creation === 'object') {
       // 有详细的缓存创建数据
       const ephemeral5mTokens = usage.cache_creation.ephemeral_5m_input_tokens || 0
       const ephemeral1hTokens = usage.cache_creation.ephemeral_1h_input_tokens || 0
 
-      // 5分钟缓存使用标准的 cache_creation_input_token_cost
-      ephemeral5mCost = ephemeral5mTokens * (pricing?.cache_creation_input_token_cost || 0)
+      // 5分钟缓存使用 cache_creation 价格
+      ephemeral5mCost = ephemeral5mTokens * actualCacheCreatePrice
 
-      // 1小时缓存使用硬编码的价格
-      const ephemeral1hPrice = this.getEphemeral1hPricing(modelName)
-      ephemeral1hCost = ephemeral1hTokens * ephemeral1hPrice
+      // 1小时缓存使用 ephemeral_1h 价格
+      ephemeral1hCost = ephemeral1hTokens * actualEphemeral1hPrice
 
       // 总的缓存创建费用
       cacheCreateCost = ephemeral5mCost + ephemeral1hCost
-    } else if (usage.cache_creation_input_tokens) {
+    } else if (cacheCreationTokens) {
       // 旧格式，所有缓存创建 tokens 都按 5 分钟价格计算（向后兼容）
-      cacheCreateCost =
-        (usage.cache_creation_input_tokens || 0) * (pricing?.cache_creation_input_token_cost || 0)
+      cacheCreateCost = cacheCreationTokens * actualCacheCreatePrice
       ephemeral5mCost = cacheCreateCost
     }
 
+    // 缓存读取费用
+    cacheReadCost = cacheReadTokens * actualCacheReadPrice
+
     return {
       inputCost,
       outputCost,
@@ -609,21 +605,11 @@ class PricingService {
       hasPricing: true,
       isLongContextRequest,
       pricing: {
-        input: useLongContextPricing
-          ? (
-              this.longContextPricing[modelName] ||
-              this.longContextPricing[Object.keys(this.longContextPricing)[0]]
-            )?.input || 0
-          : pricing?.input_cost_per_token || 0,
-        output: useLongContextPricing
-          ? (
-              this.longContextPricing[modelName] ||
-              this.longContextPricing[Object.keys(this.longContextPricing)[0]]
-            )?.output || 0
-          : pricing?.output_cost_per_token || 0,
-        cacheCreate: pricing?.cache_creation_input_token_cost || 0,
-        cacheRead: pricing?.cache_read_input_token_cost || 0,
-        ephemeral1h: this.getEphemeral1hPricing(modelName)
+        input: actualInputPrice,
+        output: actualOutputPrice,
+        cacheCreate: actualCacheCreatePrice,
+        cacheRead: actualCacheReadPrice,
+        ephemeral1h: actualEphemeral1hPrice
       }
     }
   }
diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js
new file mode 100644
index 00000000..60e71281
--- /dev/null
+++ b/tests/pricingService.test.js
@@ -0,0 +1,322 @@
+/**
+ * PricingService 长上下文（200K+）分层计费测试
+ *
+ * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑：
+ * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
+ * - 所有 token 类型（input/output/cache_create/cache_read）都切换到高档价格
+ */
+
+// Mock logger to avoid console output during tests
+jest.mock('../src/utils/logger', () => ({
+  api: jest.fn(),
+  warn: jest.fn(),
+  error: jest.fn(),
+  info: jest.fn(),
+  debug: jest.fn(),
+  success: jest.fn(),
+  database: jest.fn(),
+  security: jest.fn()
+}))
+
+// Mock fs to control pricing data
+jest.mock('fs', () => {
+  const actual = jest.requireActual('fs')
+  return {
+    ...actual,
+    existsSync: jest.fn(),
+    readFileSync: jest.fn(),
+    writeFileSync: jest.fn(),
+    mkdirSync: jest.fn(),
+    statSync: jest.fn(),
+    watchFile: jest.fn(),
+    unwatchFile: jest.fn()
+  }
+})
+
+describe('PricingService - 200K+ Long Context Pricing', () => {
+  let pricingService
+  const fs = require('fs')
+
+  // 模拟 claude-sonnet-4-20250514 的完整价格数据（来自 model_pricing.json）
+  const mockPricingData = {
+    'claude-sonnet-4-20250514': {
+      input_cost_per_token: 0.000003, // $3/MTok
+      output_cost_per_token: 0.000015, // $15/MTok
+      cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
+      cache_read_input_token_cost: 0.0000003, // $0.30/MTok
+      // 200K+ 高档价格
+      input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
+      output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
+      cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x)
+      cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x)
+      // 1小时缓存价格
+      cache_creation_input_token_cost_above_1hr: 0.0000075,
+      cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015
+    },
+    // 没有 above_200k 字段的模型
+    'claude-3-haiku-20240307': {
+      input_cost_per_token: 0.00000025,
+      output_cost_per_token: 0.00000125,
+      cache_creation_input_token_cost: 0.0000003,
+      cache_read_input_token_cost: 0.00000003
+    }
+  }
+
+  beforeEach(() => {
+    // 清除缓存的模块
+    jest.resetModules()
+
+    // 配置 fs mock
+    fs.existsSync.mockReturnValue(true)
+    fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData))
+    fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() })
+    fs.watchFile.mockImplementation(() => {})
+    fs.unwatchFile.mockImplementation(() => {})
+
+    // 重新加载 pricingService
+    pricingService = require('../src/services/pricingService')
+
+    // 直接设置价格数据（绕过初始化）
+    pricingService.pricingData = mockPricingData
+    pricingService.lastUpdated = new Date()
+  })
+
+  afterEach(() => {
+    // 清理定时器
+    if (pricingService.cleanup) {
+      pricingService.cleanup()
+    }
+    jest.clearAllMocks()
+  })
+
+  describe('阈值边界测试', () => {
+    it('199999 tokens - 应使用基础价格', () => {
+      const usage = {
+        input_tokens: 199999,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(false)
+      expect(result.pricing.input).toBe(0.000003) // 基础价格
+      expect(result.pricing.output).toBe(0.000015) // 基础价格
+    })
+
+    it('200000 tokens - 应使用基础价格（边界不触发）', () => {
+      const usage = {
+        input_tokens: 200000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      // 200000 不大于 200000，所以不触发高档价格
+      expect(result.isLongContextRequest).toBe(false)
+      expect(result.pricing.input).toBe(0.000003) // 基础价格
+    })
+
+    it('200001 tokens - 应使用 200K+ 高档价格', () => {
+      const usage = {
+        input_tokens: 200001,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.000006) // 200K+ 高档价格
+      expect(result.pricing.output).toBe(0.0000225) // 200K+ 高档价格
+    })
+  })
+
+  describe('总输入计算（input + cache_creation + cache_read）', () => {
+    it('分散在各类 token 中总计超过 200K 应触发高档价格', () => {
+      const usage = {
+        input_tokens: 150000,
+        output_tokens: 10000,
+        cache_creation_input_tokens: 40000,
+        cache_read_input_tokens: 20000
+      }
+      // Total: 150000 + 40000 + 20000 = 210000 > 200000
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.000006)
+      expect(result.pricing.output).toBe(0.0000225)
+      expect(result.pricing.cacheCreate).toBe(0.0000075)
+      expect(result.pricing.cacheRead).toBe(0.0000006)
+    })
+
+    it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
+      const usage = {
+        input_tokens: 50000,
+        output_tokens: 5000,
+        cache_creation_input_tokens: 100000,
+        cache_read_input_tokens: 60000
+      }
+      // Total: 50000 + 100000 + 60000 = 210000 > 200000
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+    })
+  })
+
+  describe('Cache 高档价格测试', () => {
+    it('cache_creation 应使用 cache_creation_input_token_cost_above_200k_tokens', () => {
+      const usage = {
+        input_tokens: 150000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 60000, // 60K cache creation
+        cache_read_input_tokens: 0
+      }
+      // Total: 210000 > 200000
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      // cache_creation_input_token_cost_above_200k_tokens = 0.0000075
+      expect(result.pricing.cacheCreate).toBe(0.0000075)
+      expect(result.cacheCreateCost).toBeCloseTo(60000 * 0.0000075, 10)
+    })
+
+    it('cache_read 应使用 cache_read_input_token_cost_above_200k_tokens', () => {
+      const usage = {
+        input_tokens: 150000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 60000 // 60K cache read
+      }
+      // Total: 210000 > 200000
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      // cache_read_input_token_cost_above_200k_tokens = 0.0000006
+      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
+    })
+  })
+
+  describe('详细缓存创建数据（ephemeral_5m / ephemeral_1h）', () => {
+    it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
+      const usage = {
+        input_tokens: 200001,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 10000, // 向后兼容字段
+        cache_read_input_tokens: 0,
+        cache_creation: {
+          ephemeral_5m_input_tokens: 5000,
+          ephemeral_1h_input_tokens: 5000
+        }
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      // ephemeral_5m: 5000 * 0.0000075 = 0.0000375
+      expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
+      // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
+      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
+    })
+  })
+
+  describe('回退测试', () => {
+    it('模型无 above_200k 字段时回退到基础价格', () => {
+      const usage = {
+        input_tokens: 250000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
+
+      // 模型没有 above_200k 字段，使用基础价格
+      expect(result.isLongContextRequest).toBe(true) // 超过 200K
+      expect(result.pricing.input).toBe(0.00000025) // 基础价格（没有 above_200k 字段）
+      expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
+    })
+  })
+
+  describe('兼容性测试', () => {
+    it('非 [1m] 模型不受影响，始终使用基础价格', () => {
+      const usage = {
+        input_tokens: 250000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0
+      }
+
+      // 不带 [1m] 后缀
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
+
+      expect(result.isLongContextRequest).toBe(false)
+      expect(result.pricing.input).toBe(0.000003) // 基础价格
+      expect(result.pricing.output).toBe(0.000015) // 基础价格
+      expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
+      expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
+    })
+
+    it('[1m] 模型未超过 200K 时使用基础价格', () => {
+      const usage = {
+        input_tokens: 100000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 50000,
+        cache_read_input_tokens: 49000
+      }
+      // Total: 199000 < 200000
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      expect(result.isLongContextRequest).toBe(false)
+      expect(result.pricing.input).toBe(0.000003) // 基础价格
+    })
+
+    it('无定价数据时返回 hasPricing=false', () => {
+      const usage = {
+        input_tokens: 250000,
+        output_tokens: 1000
+      }
+
+      const result = pricingService.calculateCost(usage, 'unknown-model[1m]')
+
+      expect(result.hasPricing).toBe(false)
+      expect(result.totalCost).toBe(0)
+    })
+  })
+
+  describe('成本计算准确性', () => {
+    it('应正确计算 200K+ 场景下的总成本', () => {
+      const usage = {
+        input_tokens: 150000,
+        output_tokens: 10000,
+        cache_creation_input_tokens: 40000,
+        cache_read_input_tokens: 20000
+      }
+      // Total input: 210000 > 200000 → 使用 200K+ 价格
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
+
+      // 手动计算预期成本
+      const expectedInputCost = 150000 * 0.000006 // $0.9
+      const expectedOutputCost = 10000 * 0.0000225 // $0.225
+      const expectedCacheCreateCost = 40000 * 0.0000075 // $0.3
+      const expectedCacheReadCost = 20000 * 0.0000006 // $0.012
+      const expectedTotal =
+        expectedInputCost + expectedOutputCost + expectedCacheCreateCost + expectedCacheReadCost
+
+      expect(result.inputCost).toBeCloseTo(expectedInputCost, 10)
+      expect(result.outputCost).toBeCloseTo(expectedOutputCost, 10)
+      expect(result.cacheCreateCost).toBeCloseTo(expectedCacheCreateCost, 10)
+      expect(result.cacheReadCost).toBeCloseTo(expectedCacheReadCost, 10)
+      expect(result.totalCost).toBeCloseTo(expectedTotal, 10)
+    })
+  })
+})

From 3b25cf01ade509e4d2e1d2a3d9713f894dd860ce Mon Sep 17 00:00:00 2001
From: sczheng189 <724100151@qq.com>
Date: Sat, 14 Feb 2026 21:32:09 +0800
Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Claude=20?=
 =?UTF-8?q?=E8=AE=A1=E8=B4=B9=E7=89=B9=E6=80=A7=E5=92=8C=E8=AF=B7=E6=B1=82?=
 =?UTF-8?q?=E5=85=83=E4=BF=A1=E6=81=AF=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E9=95=BF=E4=B8=8A=E4=B8=8B=E6=96=87=E8=AE=A1=E8=B4=B9?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/routes/api.js                |  40 +++++
 src/routes/openaiClaudeRoutes.js |  26 ++-
 src/services/pricingService.js   | 277 ++++++++++++++++++++++++++-----
 tests/pricingService.test.js     | 105 ++++++++++--
 4 files changed, 387 insertions(+), 61 deletions(-)

diff --git a/src/routes/api.js b/src/routes/api.js
index 4d5647e4..337cec5c 100644
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -468,6 +468,17 @@ async function handleMessagesRequest(req, res) {
                 cache_creation_input_tokens: cacheCreateTokens,
                 cache_read_input_tokens: cacheReadTokens
               }
+              const requestBetaHeader =
+                _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) {
+                usageObject.request_speed = _requestBody.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }
 
               // 如果有详细的缓存创建数据，添加到 usage 对象中
               if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -562,6 +573,22 @@ async function handleMessagesRequest(req, res) {
                 cache_creation_input_tokens: cacheCreateTokens,
                 cache_read_input_tokens: cacheReadTokens
               }
+              const requestBetaHeader =
+                _headersConsole['anthropic-beta'] ||
+                _headersConsole['Anthropic-Beta'] ||
+                _headersConsole['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (
+                typeof _requestBodyConsole?.speed === 'string' &&
+                _requestBodyConsole.speed.trim()
+              ) {
+                usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }
 
               // 如果有详细的缓存创建数据，添加到 usage 对象中
               if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -728,6 +755,19 @@ async function handleMessagesRequest(req, res) {
                 cache_creation_input_tokens: cacheCreateTokens,
                 cache_read_input_tokens: cacheReadTokens
               }
+              const requestBetaHeader =
+                _headersCcr['anthropic-beta'] ||
+                _headersCcr['Anthropic-Beta'] ||
+                _headersCcr['ANTHROPIC-BETA']
+              if (requestBetaHeader) {
+                usageObject.request_anthropic_beta = requestBetaHeader
+              }
+              if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) {
+                usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase()
+              }
+              if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
+                usageObject.speed = usageData.speed.trim().toLowerCase()
+              }
 
               // 如果有详细的缓存创建数据，添加到 usage 对象中
               if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
diff --git a/src/routes/openaiClaudeRoutes.js b/src/routes/openaiClaudeRoutes.js
index ae791b56..fc910b6a 100644
--- a/src/routes/openaiClaudeRoutes.js
+++ b/src/routes/openaiClaudeRoutes.js
@@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) {
                 (usage.cache_creation.ephemeral_1h_input_tokens || 0)
               : usage.cache_creation_input_tokens || 0) || 0
           const cacheReadTokens = usage.cache_read_input_tokens || 0
+          const usageWithRequestMeta = { ...usage }
+          const requestBetaHeader =
+            req.headers['anthropic-beta'] ||
+            req.headers['Anthropic-Beta'] ||
+            req.headers['ANTHROPIC-BETA']
+          if (requestBetaHeader) {
+            usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
+          }
+          if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
+            usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
+          }
 
           // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
           apiKeyService
             .recordUsageWithDetails(
               apiKeyData.id,
-              usage, // 直接传递整个 usage 对象，包含可能的 cache_creation 详细数据
+              usageWithRequestMeta, // 传递 usage + 请求模式元信息（beta/speed）
               model,
               accountId,
               accountType
@@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) {
               (usage.cache_creation.ephemeral_1h_input_tokens || 0)
             : usage.cache_creation_input_tokens || 0) || 0
         const cacheReadTokens = usage.cache_read_input_tokens || 0
+        const usageWithRequestMeta = { ...usage }
+        const requestBetaHeader =
+          req.headers['anthropic-beta'] ||
+          req.headers['Anthropic-Beta'] ||
+          req.headers['ANTHROPIC-BETA']
+        if (requestBetaHeader) {
+          usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
+        }
+        if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
+          usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
+        }
         // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
         apiKeyService
           .recordUsageWithDetails(
             apiKeyData.id,
-            usage, // 直接传递整个 usage 对象，包含可能的 cache_creation 详细数据
+            usageWithRequestMeta, // 传递 usage + 请求模式元信息（beta/speed）
             claudeRequest.model,
             accountId,
             accountType
diff --git a/src/services/pricingService.js b/src/services/pricingService.js
index 6f513f13..55423361 100644
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -63,6 +63,20 @@ class PricingService {
       'claude-haiku-3': 0.0000016,
       'claude-haiku-3-5': 0.0000016
     }
+
+    // Claude Prompt Caching 官方倍率（基于输入价格）
+    this.claudeCacheMultipliers = {
+      write5m: 1.25,
+      write1h: 2,
+      read: 0.1
+    }
+
+    // Claude 扩展计费特性
+    this.claudeFeatureFlags = {
+      context1mBeta: 'context-1m-2025-08-07',
+      fastModeBeta: 'fast-mode-2026-02-01',
+      fastModeSpeed: 'fast'
+    }
   }
 
   // 初始化价格服务
@@ -451,14 +465,139 @@ class PricingService {
     return pricing
   }
 
-  // 获取 1 小时缓存价格
-  getEphemeral1hPricing(modelName) {
+  // 从 usage 对象中提取 beta 特性列表（小写）
+  extractBetaFeatures(usage) {
+    const features = new Set()
+    if (!usage || typeof usage !== 'object') {
+      return features
+    }
+
+    const requestHeaders = usage.request_headers || usage.requestHeaders || null
+    const headerBeta =
+      requestHeaders && typeof requestHeaders === 'object'
+        ? requestHeaders['anthropic-beta'] ||
+          requestHeaders['Anthropic-Beta'] ||
+          requestHeaders['ANTHROPIC-BETA']
+        : null
+
+    const candidates = [
+      usage.anthropic_beta,
+      usage.anthropicBeta,
+      usage.request_anthropic_beta,
+      usage.requestAnthropicBeta,
+      usage.beta_header,
+      usage.betaHeader,
+      usage.beta_features,
+      headerBeta
+    ]
+
+    const addFeature = (value) => {
+      if (!value || typeof value !== 'string') {
+        return
+      }
+      value
+        .split(',')
+        .map((item) => item.trim().toLowerCase())
+        .filter(Boolean)
+        .forEach((item) => features.add(item))
+    }
+
+    for (const candidate of candidates) {
+      if (Array.isArray(candidate)) {
+        candidate.forEach(addFeature)
+      } else {
+        addFeature(candidate)
+      }
+    }
+
+    return features
+  }
+
+  // 提取请求/响应中的 speed 字段（小写）
+  extractSpeedSignal(usage) {
+    if (!usage || typeof usage !== 'object') {
+      return { responseSpeed: '', requestSpeed: '' }
+    }
+
+    const normalize = (value) =>
+      typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : ''
+
+    return {
+      responseSpeed: normalize(usage.speed),
+      requestSpeed: normalize(usage.request_speed || usage.requestSpeed)
+    }
+  }
+
+  // Claude Fast Mode 目前仅适用于 Opus 4.6 系列
+  isFastModeEligibleClaudeModel(modelName) {
+    return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
+  }
+
+  // 去掉模型名中的 [1m] 后缀，便于价格查找
+  stripLongContextSuffix(modelName) {
+    if (typeof modelName !== 'string') {
+      return modelName
+    }
+    return modelName.replace(/\[1m\]/gi, '').trim()
+  }
+
+  // 获取 Fast Mode 对应的价格条目（仅匹配 fast/ 前缀）
+  getFastModePricing(modelName) {
+    if (!this.pricingData || !modelName) {
+      return null
+    }
+
+    const cleanedModelName = this.stripLongContextSuffix(modelName)
+    const exactCandidates = new Set([`fast/${cleanedModelName}`])
+
+    if (cleanedModelName.startsWith('fast/')) {
+      exactCandidates.add(cleanedModelName)
+    }
+
+    for (const candidate of exactCandidates) {
+      if (this.pricingData[candidate]) {
+        logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
+        return this.pricingData[candidate]
+      }
+    }
+
+    const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
+    for (const [key, value] of Object.entries(this.pricingData)) {
+      if (!key.startsWith('fast/')) {
+        continue
+      }
+      const normalizedFastKey = key
+        .slice('fast/'.length)
+        .toLowerCase()
+        .replace(/[_-]/g, '')
+      if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) {
+        logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
+        return value
+      }
+    }
+
+    logger.debug(`💰 No fast pricing found for model: ${modelName}`)
+    return null
+  }
+
+  // 获取 1 小时缓存价格（优先使用 model_pricing.json 中的模型字段）
+  getEphemeral1hPricing(modelName, pricing = null) {
+    if (
+      pricing?.cache_creation_input_token_cost_above_1hr !== null &&
+      pricing?.cache_creation_input_token_cost_above_1hr !== undefined
+    ) {
+      return pricing.cache_creation_input_token_cost_above_1hr
+    }
+
     if (!modelName) {
       return 0
     }
 
     // 尝试直接匹配
-    if (this.ephemeral1hPricing[modelName]) {
+    if (
+      this.ephemeral1hPricing[modelName] !== null &&
+      this.ephemeral1hPricing[modelName] !== undefined
+    ) {
       return this.ephemeral1hPricing[modelName]
     }
 
@@ -487,8 +626,10 @@ class PricingService {
 
   // 计算使用费用
   calculateCost(usage, modelName) {
+    const normalizedModelName = this.stripLongContextSuffix(modelName)
+
     // 检查是否为 1M 上下文模型（用户通过 [1m] 后缀主动选择长上下文模式）
-    const isLongContextModel = modelName && modelName.includes('[1m]')
+    const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]')
     let isLongContextRequest = false
     let useLongContextPricing = false
 
@@ -498,27 +639,31 @@ class PricingService {
     const cacheReadTokens = usage.cache_read_input_tokens || 0
     const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
 
-    // 获取模型定价信息
-    const pricing = this.getModelPricing(modelName)
+    // 识别 Claude 特性标识
+    const betaFeatures = this.extractBetaFeatures(usage)
+    const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta)
+    const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta)
+    const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage)
+    const hasFastSpeedSignal =
+      responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
+      requestSpeed === this.claudeFeatureFlags.fastModeSpeed
+    const isFastModeRequest =
+      hasFastModeBeta &&
+      hasFastSpeedSignal &&
+      this.isFastModeEligibleClaudeModel(normalizedModelName)
+    const standardPricing = this.getModelPricing(modelName)
+    const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
+    const pricing = fastPricing || standardPricing
+    const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
 
-    // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时，使用高档价格
+    // 当 [1m] 模型总输入超过 200K 时，进入 200K+ 计费逻辑
     // 根据 Anthropic 官方文档：当总输入超过 200K 时，整个请求所有 token 类型都使用高档价格
-    if (isLongContextModel && totalInputTokens > 200000) {
+    if (isLongContextModeEnabled && totalInputTokens > 200000) {
       isLongContextRequest = true
-      // 检查 model_pricing.json 是否有 above_200k 字段
-      if (
-        pricing?.input_cost_per_token_above_200k_tokens !== null &&
-        pricing?.input_cost_per_token_above_200k_tokens !== undefined
-      ) {
-        useLongContextPricing = true
-        logger.info(
-          `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
-        )
-      } else {
-        logger.warn(
-          `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json`
-        )
-      }
+      useLongContextPricing = true
+      logger.info(
+        `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
+      )
     }
 
     if (!pricing) {
@@ -535,32 +680,76 @@ class PricingService {
       }
     }
 
-    // 确定实际使用的价格（普通或 200K+ 高档价格）
-    const actualInputPrice = useLongContextPricing
-      ? pricing.input_cost_per_token_above_200k_tokens
-      : pricing.input_cost_per_token || 0
+    const isClaudeModel =
+      (modelName && modelName.toLowerCase().includes('claude')) ||
+      (typeof pricing?.litellm_provider === 'string' &&
+        pricing.litellm_provider.toLowerCase().includes('anthropic'))
 
-    const actualOutputPrice = useLongContextPricing
-      ? pricing.output_cost_per_token_above_200k_tokens
-      : pricing.output_cost_per_token || 0
+    if (isFastModeRequest && fastPricing) {
+      logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
+    } else if (isFastModeRequest && !fastPricing) {
+      logger.warn(
+        `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
+      )
+    }
 
-    const actualCacheCreatePrice = useLongContextPricing
-      ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
-        pricing.cache_creation_input_token_cost ||
-        0
-      : pricing.cache_creation_input_token_cost || 0
+    const baseInputPrice = pricing.input_cost_per_token || 0
+    const hasInput200kPrice =
+      pricing.input_cost_per_token_above_200k_tokens !== null &&
+      pricing.input_cost_per_token_above_200k_tokens !== undefined
 
-    const actualCacheReadPrice = useLongContextPricing
-      ? pricing.cache_read_input_token_cost_above_200k_tokens ||
-        pricing.cache_read_input_token_cost ||
-        0
-      : pricing.cache_read_input_token_cost || 0
+    // 确定实际使用的输入价格（普通或 200K+ 高档价格）
+    // Claude 模型在 200K+ 场景下如果缺少官方字段，按 2 倍输入价兜底
+    let actualInputPrice = useLongContextPricing
+      ? hasInput200kPrice
+        ? pricing.input_cost_per_token_above_200k_tokens
+        : isClaudeModel
+          ? baseInputPrice * 2
+          : baseInputPrice
+      : baseInputPrice
 
-    // 1小时缓存的 200K+ 价格
-    const actualEphemeral1hPrice = useLongContextPricing
-      ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens ||
-        this.getEphemeral1hPricing(modelName)
-      : this.getEphemeral1hPricing(modelName)
+    const baseOutputPrice = pricing.output_cost_per_token || 0
+    const hasOutput200kPrice =
+      pricing.output_cost_per_token_above_200k_tokens !== null &&
+      pricing.output_cost_per_token_above_200k_tokens !== undefined
+    let actualOutputPrice = useLongContextPricing
+      ? hasOutput200kPrice
+        ? pricing.output_cost_per_token_above_200k_tokens
+        : baseOutputPrice
+      : baseOutputPrice
+
+    let actualCacheCreatePrice = 0
+    let actualCacheReadPrice = 0
+    let actualEphemeral1hPrice = 0
+
+    if (isClaudeModel) {
+      // Claude 模型缓存价格统一按输入价格倍率推导，避免来源字段不一致导致计费偏差
+      actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
+      actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
+      actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
+    } else {
+      actualCacheCreatePrice = useLongContextPricing
+        ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
+          pricing.cache_creation_input_token_cost ||
+          0
+        : pricing.cache_creation_input_token_cost || 0
+
+      actualCacheReadPrice = useLongContextPricing
+        ? pricing.cache_read_input_token_cost_above_200k_tokens ||
+          pricing.cache_read_input_token_cost ||
+          0
+        : pricing.cache_read_input_token_cost || 0
+
+      const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
+
+      // 非 Claude 模型维持原有字段优先级
+      actualEphemeral1hPrice = useLongContextPricing
+        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
+          pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
+          ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
+          : defaultEphemeral1hPrice
+        : defaultEphemeral1hPrice
+    }
 
     // 计算各项费用
     const inputCost = inputTokens * actualInputPrice
diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js
index 60e71281..40dc0dd7 100644
--- a/tests/pricingService.test.js
+++ b/tests/pricingService.test.js
@@ -2,8 +2,11 @@
  * PricingService 长上下文（200K+）分层计费测试
  *
  * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑：
- * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
- * - 所有 token 类型（input/output/cache_create/cache_read）都切换到高档价格
+ * - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
+ * - Claude 缓存价格按输入价格倍率推导：
+ *   - 5m cache write = input * 1.25
+ *   - 1h cache write = input * 2
+ *   - cache read = input * 0.1
  */
 
 // Mock logger to avoid console output during tests
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       output_cost_per_token: 0.000015, // $15/MTok
       cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
       cache_read_input_token_cost: 0.0000003, // $0.30/MTok
+      max_input_tokens: 1000000,
       // 200K+ 高档价格
       input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
       output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       output_cost_per_token: 0.00000125,
       cache_creation_input_token_cost: 0.0000003,
       cache_read_input_token_cost: 0.00000003
+    },
+    // Fast Mode 适配测试模型（Opus 4.6）
+    'claude-opus-4-6': {
+      input_cost_per_token: 0.000005,
+      output_cost_per_token: 0.000025,
+      cache_creation_input_token_cost: 0.00000625,
+      cache_read_input_token_cost: 0.0000005,
+      input_cost_per_token_above_200k_tokens: 0.00001,
+      output_cost_per_token_above_200k_tokens: 0.0000375
     }
   }
 
@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       expect(result.pricing.input).toBe(0.000006)
       expect(result.pricing.output).toBe(0.0000225)
       expect(result.pricing.cacheCreate).toBe(0.0000075)
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
     })
 
     it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
 
       // cache_read_input_token_cost_above_200k_tokens = 0.0000006
-      expect(result.pricing.cacheRead).toBe(0.0000006)
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
       expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
     })
   })
 
   describe('详细缓存创建数据（ephemeral_5m / ephemeral_1h）', () => {
-    it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
+    it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
       const usage = {
         input_tokens: 200001,
         output_tokens: 1000,
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       expect(result.isLongContextRequest).toBe(true)
       // ephemeral_5m: 5000 * 0.0000075 = 0.0000375
       expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
-      // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
-      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
+      // 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
+      expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
     })
   })
 
   describe('回退测试', () => {
-    it('模型无 above_200k 字段时回退到基础价格', () => {
+    it('Claude 模型无 above_200k 字段时，200K+ 输入价格按 2 倍并推导缓存价格', () => {
       const usage = {
         input_tokens: 250000,
         output_tokens: 1000,
-        cache_creation_input_tokens: 0,
-        cache_read_input_tokens: 0
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000
       }
 
       const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
 
-      // 模型没有 above_200k 字段，使用基础价格
-      expect(result.isLongContextRequest).toBe(true) // 超过 200K
-      expect(result.pricing.input).toBe(0.00000025) // 基础价格（没有 above_200k 字段）
-      expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
+      // 模型没有 above_200k 字段，Claude 200K+ 输入按 2 倍兜底
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
+      // 缓存价格由输入价格推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
+    })
+  })
+
+  describe('Header 与 Fast Mode 适配', () => {
+    it('无 [1m] 后缀但带 context-1m beta，超过 200K 时应触发长上下文计费', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+        request_anthropic_beta: 'context-1m-2025-08-07'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
+
+      expect(result.isLongContextRequest).toBe(true)
+      expect(result.pricing.input).toBe(0.000006)
+      expect(result.pricing.output).toBe(0.0000225)
+    })
+
+    it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
+      const usage = {
+        input_tokens: 100000,
+        output_tokens: 20000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 5000,
+        request_anthropic_beta: 'fast-mode-2026-02-01',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
+
+      // input: 0.000005 * 6 = 0.00003
+      expect(result.pricing.input).toBeCloseTo(0.00003, 12)
+      // output: 0.000025 * 6 = 0.00015
+      expect(result.pricing.output).toBeCloseTo(0.00015, 12)
+      // cache create/read 由 fast 后 input 推导
+      expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
+      expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
+      expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
+    })
+
+    it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费（12x input）', () => {
+      const usage = {
+        input_tokens: 210000,
+        output_tokens: 1000,
+        cache_creation_input_tokens: 10000,
+        cache_read_input_tokens: 10000,
+        request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
+        speed: 'fast'
+      }
+
+      const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
+
+      expect(result.isLongContextRequest).toBe(true)
+      // input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
+      expect(result.pricing.input).toBeCloseTo(0.00006, 12)
+      // output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
+      expect(result.pricing.output).toBeCloseTo(0.000225, 12)
     })
   })
 
@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
       expect(result.pricing.input).toBe(0.000003) // 基础价格
       expect(result.pricing.output).toBe(0.000015) // 基础价格
       expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
-      expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
+      expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
     })
 
     it('[1m] 模型未超过 200K 时使用基础价格', () => {

From 5376428dd98e02e77c1625def690416bbabdb31f Mon Sep 17 00:00:00 2001
From: sczheng189 <724100151@qq.com>
Date: Mon, 23 Feb 2026 20:12:42 +0800
Subject: [PATCH 3/6] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9=20epheme?=
 =?UTF-8?q?ral=205m=20=E5=92=8C=201h=20=E4=BB=A4=E7=89=8C=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98=E5=8C=96=E8=B4=B9=E7=94=A8?=
 =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                            |  2 ++
 src/models/redis.js                   | 24 ++++++++++++++
 src/routes/admin/apiKeys.js           | 31 +++++++++++++-----
 src/routes/admin/dashboard.js         | 14 +++++++-
 src/routes/admin/usageStats.js        | 47 +++++++++++++++++++++++++++
 src/routes/apiStats.js                | 14 +++++++-
 src/services/accountBalanceService.js | 10 ++++++
 7 files changed, 131 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index e4c9e9c1..71121e38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -247,3 +247,5 @@ web/apiStats/
 
 # Admin SPA build files
 web/admin-spa/dist/
+
+.serena/
diff --git a/src/models/redis.js b/src/models/redis.js
index 2f139d6a..dbac2a7d 100644
--- a/src/models/redis.js
+++ b/src/models/redis.js
@@ -1084,6 +1084,9 @@ class RedisClient {
     pipeline.hincrby(modelDaily, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(modelDaily, 'allTokens', totalTokens)
     pipeline.hincrby(modelDaily, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(modelDaily, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(modelDaily, 'ephemeral1hTokens', ephemeral1hTokens)
 
     // 按模型统计 - 每月
     pipeline.hincrby(modelMonthly, 'inputTokens', finalInputTokens)
@@ -1092,6 +1095,9 @@ class RedisClient {
     pipeline.hincrby(modelMonthly, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(modelMonthly, 'allTokens', totalTokens)
     pipeline.hincrby(modelMonthly, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(modelMonthly, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(modelMonthly, 'ephemeral1hTokens', ephemeral1hTokens)
 
     // API Key级别的模型统计 - 每日
     pipeline.hincrby(keyModelDaily, 'inputTokens', finalInputTokens)
@@ -1136,6 +1142,9 @@ class RedisClient {
     pipeline.hincrby(keyModelAlltime, 'cacheCreateTokens', finalCacheCreateTokens)
     pipeline.hincrby(keyModelAlltime, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(keyModelAlltime, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(keyModelAlltime, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(keyModelAlltime, 'ephemeral1hTokens', ephemeral1hTokens)
     // 费用统计
     if (realCost > 0) {
       pipeline.hincrby(keyModelAlltime, 'realCostMicro', Math.round(realCost * 1000000))
@@ -1152,6 +1161,9 @@ class RedisClient {
     pipeline.hincrby(hourly, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(hourly, 'allTokens', totalTokens)
     pipeline.hincrby(hourly, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(hourly, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(hourly, 'ephemeral1hTokens', ephemeral1hTokens)
 
     // 按模型统计 - 每小时
     pipeline.hincrby(modelHourly, 'inputTokens', finalInputTokens)
@@ -1160,6 +1172,9 @@ class RedisClient {
     pipeline.hincrby(modelHourly, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(modelHourly, 'allTokens', totalTokens)
     pipeline.hincrby(modelHourly, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(modelHourly, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(modelHourly, 'ephemeral1hTokens', ephemeral1hTokens)
 
     // API Key级别的模型统计 - 每小时
     pipeline.hincrby(keyModelHourly, 'inputTokens', finalInputTokens)
@@ -1168,6 +1183,9 @@ class RedisClient {
     pipeline.hincrby(keyModelHourly, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(keyModelHourly, 'allTokens', totalTokens)
     pipeline.hincrby(keyModelHourly, 'requests', 1)
+    // 详细缓存类型统计
+    pipeline.hincrby(keyModelHourly, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(keyModelHourly, 'ephemeral1hTokens', ephemeral1hTokens)
     // 费用统计
     if (realCost > 0) {
       pipeline.hincrby(keyModelHourly, 'realCostMicro', Math.round(realCost * 1000000))
@@ -1235,18 +1253,24 @@ class RedisClient {
     pipeline.hincrby('usage:global:total', 'cacheCreateTokens', finalCacheCreateTokens)
     pipeline.hincrby('usage:global:total', 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby('usage:global:total', 'allTokens', totalTokens)
+    pipeline.hincrby('usage:global:total', 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby('usage:global:total', 'ephemeral1hTokens', ephemeral1hTokens)
     pipeline.hincrby(globalDaily, 'requests', 1)
     pipeline.hincrby(globalDaily, 'inputTokens', finalInputTokens)
     pipeline.hincrby(globalDaily, 'outputTokens', finalOutputTokens)
     pipeline.hincrby(globalDaily, 'cacheCreateTokens', finalCacheCreateTokens)
     pipeline.hincrby(globalDaily, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(globalDaily, 'allTokens', totalTokens)
+    pipeline.hincrby(globalDaily, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(globalDaily, 'ephemeral1hTokens', ephemeral1hTokens)
     pipeline.hincrby(globalMonthly, 'requests', 1)
     pipeline.hincrby(globalMonthly, 'inputTokens', finalInputTokens)
     pipeline.hincrby(globalMonthly, 'outputTokens', finalOutputTokens)
     pipeline.hincrby(globalMonthly, 'cacheCreateTokens', finalCacheCreateTokens)
     pipeline.hincrby(globalMonthly, 'cacheReadTokens', finalCacheReadTokens)
     pipeline.hincrby(globalMonthly, 'allTokens', totalTokens)
+    pipeline.hincrby(globalMonthly, 'ephemeral5mTokens', ephemeral5mTokens)
+    pipeline.hincrby(globalMonthly, 'ephemeral1hTokens', ephemeral1hTokens)
     pipeline.expire(globalDaily, 86400 * 32)
     pipeline.expire(globalMonthly, 86400 * 365)
 
diff --git a/src/routes/admin/apiKeys.js b/src/routes/admin/apiKeys.js
index 7bbc4f8a..d348493c 100644
--- a/src/routes/admin/apiKeys.js
+++ b/src/routes/admin/apiKeys.js
@@ -1289,6 +1289,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
         outputTokens: 0,
         cacheCreateTokens: 0,
         cacheReadTokens: 0,
+        ephemeral5mTokens: 0,
+        ephemeral1hTokens: 0,
         requests: 0
       })
     }
@@ -1300,6 +1302,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
       parseInt(data.totalCacheCreateTokens) || parseInt(data.cacheCreateTokens) || 0
     stats.cacheReadTokens +=
       parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0
+    stats.ephemeral5mTokens +=
+      parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
+    stats.ephemeral1hTokens +=
+      parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
     stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
 
     totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
@@ -1318,15 +1324,22 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     cacheCreateTokens += stats.cacheCreateTokens
     cacheReadTokens += stats.cacheReadTokens
 
-    const costResult = CostCalculator.calculateCost(
-      {
-        input_tokens: stats.inputTokens,
-        output_tokens: stats.outputTokens,
-        cache_creation_input_tokens: stats.cacheCreateTokens,
-        cache_read_input_tokens: stats.cacheReadTokens
-      },
-      model
-    )
+    const costUsage = {
+      input_tokens: stats.inputTokens,
+      output_tokens: stats.outputTokens,
+      cache_creation_input_tokens: stats.cacheCreateTokens,
+      cache_read_input_tokens: stats.cacheReadTokens
+    }
+
+    // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+    if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
+      costUsage.cache_creation = {
+        ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
+        ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+      }
+    }
+
+    const costResult = CostCalculator.calculateCost(costUsage, model)
     totalCost += costResult.costs.total
   }
 
diff --git a/src/routes/admin/dashboard.js b/src/routes/admin/dashboard.js
index fb47f98e..52f7f09e 100644
--- a/src/routes/admin/dashboard.js
+++ b/src/routes/admin/dashboard.js
@@ -472,7 +472,9 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
           outputTokens: 0,
           cacheCreateTokens: 0,
           cacheReadTokens: 0,
-          allTokens: 0
+          allTokens: 0,
+          ephemeral5mTokens: 0,
+          ephemeral1hTokens: 0
         }
 
         stats.requests += parseInt(data.requests) || 0
@@ -481,6 +483,8 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
         stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
         stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
         stats.allTokens += parseInt(data.allTokens) || 0
+        stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+        stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
 
         modelStatsMap.set(normalizedModel, stats)
       }
@@ -497,6 +501,14 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
         cache_read_input_tokens: stats.cacheReadTokens
       }
 
+      // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+      if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
+        usage.cache_creation = {
+          ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
+          ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+        }
+      }
+
       // 计算费用
       const costData = CostCalculator.calculateCost(usage, model)
 
diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js
index cfa61bd4..5bd9ea79 100644
--- a/src/routes/admin/usageStats.js
+++ b/src/routes/admin/usageStats.js
@@ -786,6 +786,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
           const modelOutputTokens = parseInt(data.outputTokens) || 0
           const modelCacheCreateTokens = parseInt(data.cacheCreateTokens) || 0
           const modelCacheReadTokens = parseInt(data.cacheReadTokens) || 0
+          const modelEphemeral5mTokens = parseInt(data.ephemeral5mTokens) || 0
+          const modelEphemeral1hTokens = parseInt(data.ephemeral1hTokens) || 0
           const modelRequests = parseInt(data.requests) || 0
 
           dayInputTokens += modelInputTokens
@@ -800,6 +802,15 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
             cache_creation_input_tokens: modelCacheCreateTokens,
             cache_read_input_tokens: modelCacheReadTokens
           }
+
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (modelEphemeral5mTokens > 0 || modelEphemeral1hTokens > 0) {
+            modelUsage.cache_creation = {
+              ephemeral_5m_input_tokens: modelEphemeral5mTokens,
+              ephemeral_1h_input_tokens: modelEphemeral1hTokens
+            }
+          }
+
           const modelCostResult = CostCalculator.calculateCost(modelUsage, model)
           dayCost += modelCostResult.costs.total
         }
@@ -948,6 +959,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
               outputTokens: 0,
               cacheCreateTokens: 0,
               cacheReadTokens: 0,
+              ephemeral5mTokens: 0,
+              ephemeral1hTokens: 0,
               allTokens: 0
             })
           }
@@ -957,6 +970,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
           stats.outputTokens += parseInt(data.outputTokens) || 0
           stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
           stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+          stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+          stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
           stats.allTokens += parseInt(data.allTokens) || 0
         }
       }
@@ -992,6 +1007,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
             outputTokens: 0,
             cacheCreateTokens: 0,
             cacheReadTokens: 0,
+            ephemeral5mTokens: 0,
+            ephemeral1hTokens: 0,
             allTokens: 0
           })
         }
@@ -1001,6 +1018,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
         stats.outputTokens += parseInt(data.outputTokens) || 0
         stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
         stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+        stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+        stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
         stats.allTokens += parseInt(data.allTokens) || 0
       }
     }
@@ -1016,6 +1035,14 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
         cache_read_input_tokens: stats.cacheReadTokens
       }
 
+      // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+      if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
+        usage.cache_creation = {
+          ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
+          ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+        }
+      }
+
       // 使用CostCalculator计算费用
       const costData = CostCalculator.calculateCost(usage, model)
 
@@ -1424,6 +1451,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
               cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
             }
 
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+            const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+            if (eph5m > 0 || eph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: eph5m,
+                ephemeral_1h_input_tokens: eph1h
+              }
+            }
+
             const costResult = CostCalculator.calculateCost(usage, modelName)
             cost += costResult.costs.total
           }
@@ -1582,6 +1619,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
               cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
             }
 
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+            const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+            if (eph5m > 0 || eph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: eph5m,
+                ephemeral_1h_input_tokens: eph1h
+              }
+            }
+
             const costResult = CostCalculator.calculateCost(usage, modelName)
             cost += costResult.costs.total
           }
diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js
index 9d7b2ca6..a156df65 100644
--- a/src/routes/apiStats.js
+++ b/src/routes/apiStats.js
@@ -270,7 +270,9 @@ router.post('/api/user-stats', async (req, res) => {
                 inputTokens: 0,
                 outputTokens: 0,
                 cacheCreateTokens: 0,
-                cacheReadTokens: 0
+                cacheReadTokens: 0,
+                ephemeral5mTokens: 0,
+                ephemeral1hTokens: 0
               })
             }
 
@@ -279,6 +281,8 @@ router.post('/api/user-stats', async (req, res) => {
             modelUsage.outputTokens += parseInt(data.outputTokens) || 0
             modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
             modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+            modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+            modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
           }
         }
 
@@ -291,6 +295,14 @@ router.post('/api/user-stats', async (req, res) => {
             cache_read_input_tokens: usage.cacheReadTokens
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+            usageData.cache_creation = {
+              ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+              ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(usageData, model)
           totalCost += costResult.costs.total
         }
diff --git a/src/services/accountBalanceService.js b/src/services/accountBalanceService.js
index ec25f171..81fd0501 100644
--- a/src/services/accountBalanceService.js
+++ b/src/services/accountBalanceService.js
@@ -607,6 +607,16 @@ class AccountBalanceService {
             cache_read_input_tokens: parseInt(data.cacheReadTokens || 0)
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          const eph5m = parseInt(data.ephemeral5mTokens || 0)
+          const eph1h = parseInt(data.ephemeral1hTokens || 0)
+          if (eph5m > 0 || eph1h > 0) {
+            usage.cache_creation = {
+              ephemeral_5m_input_tokens: eph5m,
+              ephemeral_1h_input_tokens: eph1h
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(usage, model)
           totalCost += costResult.costs.total || 0
         }

From bfae62bfeb294a519d4d2ebe78cd1ee145899cb9 Mon Sep 17 00:00:00 2001
From: sczheng189 <724100151@qq.com>
Date: Mon, 23 Feb 2026 21:20:18 +0800
Subject: [PATCH 4/6] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9=20ephem?=
 =?UTF-8?q?eral=205m=20=E5=92=8C=201h=20=E4=BB=A4=E7=89=8C=E7=9A=84?=
 =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98=E5=8C=96=E8=B4=B9=E7=94=A8?=
 =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/models/redis.js                |  65 +++++++++
 src/routes/admin/claudeAccounts.js |   8 ++
 src/routes/admin/usageStats.js     | 222 ++++++++++++++++++++++++++---
 src/routes/apiStats.js             |  38 +++++
 src/services/apiKeyService.js      |   4 +
 src/services/costInitService.js    |  10 ++
 src/services/droidRelayService.js  |   2 +
 7 files changed, 331 insertions(+), 18 deletions(-)

diff --git a/src/models/redis.js b/src/models/redis.js
index dbac2a7d..bdffc9c6 100644
--- a/src/models/redis.js
+++ b/src/models/redis.js
@@ -1286,6 +1286,8 @@ class RedisClient {
     outputTokens = 0,
     cacheCreateTokens = 0,
     cacheReadTokens = 0,
+    ephemeral5mTokens = 0,
+    ephemeral1hTokens = 0,
     model = 'unknown',
     isLongContextRequest = false
   ) {
@@ -1317,6 +1319,8 @@ class RedisClient {
     const finalOutputTokens = outputTokens || 0
     const finalCacheCreateTokens = cacheCreateTokens || 0
     const finalCacheReadTokens = cacheReadTokens || 0
+    const finalEphemeral5mTokens = ephemeral5mTokens || 0
+    const finalEphemeral1hTokens = ephemeral1hTokens || 0
     const actualTotalTokens =
       finalInputTokens + finalOutputTokens + finalCacheCreateTokens + finalCacheReadTokens
     const coreTokens = finalInputTokens + finalOutputTokens
@@ -1329,6 +1333,8 @@ class RedisClient {
       this.client.hincrby(accountKey, 'totalOutputTokens', finalOutputTokens),
       this.client.hincrby(accountKey, 'totalCacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountKey, 'totalCacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountKey, 'totalEphemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountKey, 'totalEphemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountKey, 'totalAllTokens', actualTotalTokens),
       this.client.hincrby(accountKey, 'totalRequests', 1),
 
@@ -1338,6 +1344,8 @@ class RedisClient {
       this.client.hincrby(accountDaily, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountDaily, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountDaily, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountDaily, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountDaily, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountDaily, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountDaily, 'requests', 1),
 
@@ -1347,6 +1355,8 @@ class RedisClient {
       this.client.hincrby(accountMonthly, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountMonthly, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountMonthly, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountMonthly, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountMonthly, 'requests', 1),
 
@@ -1356,6 +1366,8 @@ class RedisClient {
       this.client.hincrby(accountHourly, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountHourly, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountHourly, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountHourly, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountHourly, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountHourly, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountHourly, 'requests', 1),
 
@@ -1376,6 +1388,16 @@ class RedisClient {
         `model:${normalizedModel}:cacheReadTokens`,
         finalCacheReadTokens
       ),
+      this.client.hincrby(
+        accountHourly,
+        `model:${normalizedModel}:ephemeral5mTokens`,
+        finalEphemeral5mTokens
+      ),
+      this.client.hincrby(
+        accountHourly,
+        `model:${normalizedModel}:ephemeral1hTokens`,
+        finalEphemeral1hTokens
+      ),
       this.client.hincrby(accountHourly, `model:${normalizedModel}:allTokens`, actualTotalTokens),
       this.client.hincrby(accountHourly, `model:${normalizedModel}:requests`, 1),
 
@@ -1384,6 +1406,8 @@ class RedisClient {
       this.client.hincrby(accountModelDaily, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountModelDaily, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountModelDaily, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountModelDaily, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountModelDaily, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountModelDaily, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountModelDaily, 'requests', 1),
 
@@ -1392,6 +1416,8 @@ class RedisClient {
       this.client.hincrby(accountModelMonthly, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountModelMonthly, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountModelMonthly, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountModelMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountModelMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountModelMonthly, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountModelMonthly, 'requests', 1),
 
@@ -1400,6 +1426,8 @@ class RedisClient {
       this.client.hincrby(accountModelHourly, 'outputTokens', finalOutputTokens),
       this.client.hincrby(accountModelHourly, 'cacheCreateTokens', finalCacheCreateTokens),
       this.client.hincrby(accountModelHourly, 'cacheReadTokens', finalCacheReadTokens),
+      this.client.hincrby(accountModelHourly, 'ephemeral5mTokens', finalEphemeral5mTokens),
+      this.client.hincrby(accountModelHourly, 'ephemeral1hTokens', finalEphemeral1hTokens),
       this.client.hincrby(accountModelHourly, 'allTokens', actualTotalTokens),
       this.client.hincrby(accountModelHourly, 'requests', 1),
 
@@ -1867,6 +1895,16 @@ class RedisClient {
           cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
         }
 
+        // 添加 cache_creation 子对象以支持精确 ephemeral 定价
+        const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
+        const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(usage, model)
         totalCost += costResult.costs.total
 
@@ -1955,6 +1993,16 @@ class RedisClient {
           cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
         }
 
+        // 添加 cache_creation 子对象以支持精确 ephemeral 定价
+        const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
+        const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(usage, model)
         costMap.set(accountId, costMap.get(accountId) + costResult.costs.total)
       }
@@ -1996,6 +2044,17 @@ class RedisClient {
           cache_creation_input_tokens: parseInt(modelUsage.cacheCreateTokens || 0),
           cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
         }
+
+        // 添加 cache_creation 子对象以支持精确 ephemeral 定价
+        const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
+        const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(usage, model)
         totalCost += costResult.costs.total
       }
@@ -3646,6 +3705,8 @@ class RedisClient {
                   outputTokens: 0,
                   cacheCreateTokens: 0,
                   cacheReadTokens: 0,
+                  ephemeral5mTokens: 0,
+                  ephemeral1hTokens: 0,
                   allTokens: 0,
                   requests: 0
                 }
@@ -3659,6 +3720,10 @@ class RedisClient {
                 modelUsage[modelName].cacheCreateTokens += parseInt(value || 0)
               } else if (metric === 'cacheReadTokens') {
                 modelUsage[modelName].cacheReadTokens += parseInt(value || 0)
+              } else if (metric === 'ephemeral5mTokens') {
+                modelUsage[modelName].ephemeral5mTokens += parseInt(value || 0)
+              } else if (metric === 'ephemeral1hTokens') {
+                modelUsage[modelName].ephemeral1hTokens += parseInt(value || 0)
               } else if (metric === 'allTokens') {
                 modelUsage[modelName].allTokens += parseInt(value || 0)
               } else if (metric === 'requests') {
diff --git a/src/routes/admin/claudeAccounts.js b/src/routes/admin/claudeAccounts.js
index 590e919d..699aa3ac 100644
--- a/src/routes/admin/claudeAccounts.js
+++ b/src/routes/admin/claudeAccounts.js
@@ -417,6 +417,14 @@ router.get('/claude-accounts', authenticateAdmin, async (req, res) => {
                 cache_read_input_tokens: usage.cacheReadTokens
               }
 
+              // 添加 cache_creation 子对象以支持精确 ephemeral 定价
+              if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+                usageData.cache_creation = {
+                  ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+                  ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+                }
+              }
+
               logger.debug(`💰 Calculating cost for model ${modelName}:`, JSON.stringify(usageData))
               const costResult = CostCalculator.calculateCost(usageData, modelName)
               logger.debug(`💰 Cost result for ${modelName}: total=${costResult.costs.total}`)
diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js
index 5bd9ea79..849ad54b 100644
--- a/src/routes/admin/usageStats.js
+++ b/src/routes/admin/usageStats.js
@@ -362,6 +362,16 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req,
           cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
         }
 
+        // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+        const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+        const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(usage, modelName)
         summedCost += costResult.costs.total
       }
@@ -403,6 +413,15 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req,
           cache_creation_input_tokens: cacheCreateTokens,
           cache_read_input_tokens: cacheReadTokens
         }
+        // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+        const fbEph5m = parseInt(dailyData?.ephemeral5mTokens) || 0
+        const fbEph1h = parseInt(dailyData?.ephemeral1hTokens) || 0
+        if (fbEph5m > 0 || fbEph1h > 0) {
+          fallbackUsage.cache_creation = {
+            ephemeral_5m_input_tokens: fbEph5m,
+            ephemeral_1h_input_tokens: fbEph1h
+          }
+        }
         const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
         cost = fallbackResult.costs.total
       }
@@ -653,12 +672,23 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
             cache_creation_input_tokens: modelCacheCreateTokens,
             cache_read_input_tokens: modelCacheReadTokens
           }
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          const mEph5m = parseInt(data.ephemeral5mTokens) || 0
+          const mEph1h = parseInt(data.ephemeral1hTokens) || 0
+          if (mEph5m > 0 || mEph1h > 0) {
+            modelUsage.cache_creation = {
+              ephemeral_5m_input_tokens: mEph5m,
+              ephemeral_1h_input_tokens: mEph1h
+            }
+          }
           const modelCostResult = CostCalculator.calculateCost(modelUsage, model)
           hourCost += modelCostResult.costs.total
         }
 
         // 如果没有模型级别的数据，尝试API Key级别的数据
         if (modelKeys.length === 0) {
+          let hourEph5m = 0
+          let hourEph1h = 0
           for (const key of usageKeys) {
             const data = usageDataMap.get(key)
             if (data) {
@@ -667,6 +697,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
               hourRequests += parseInt(data.requests) || 0
               hourCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
               hourCacheReadTokens += parseInt(data.cacheReadTokens) || 0
+              hourEph5m += parseInt(data.ephemeral5mTokens) || 0
+              hourEph1h += parseInt(data.ephemeral1hTokens) || 0
             }
           }
 
@@ -676,6 +708,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
             cache_creation_input_tokens: hourCacheCreateTokens,
             cache_read_input_tokens: hourCacheReadTokens
           }
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (hourEph5m > 0 || hourEph1h > 0) {
+            usage.cache_creation = {
+              ephemeral_5m_input_tokens: hourEph5m,
+              ephemeral_1h_input_tokens: hourEph1h
+            }
+          }
           const costResult = CostCalculator.calculateCost(usage, 'unknown')
           hourCost = costResult.costs.total
         }
@@ -817,6 +856,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
 
         // 如果没有模型级别的数据，回退到原始方法
         if (modelKeys.length === 0 && usageKeys.length > 0) {
+          let dayEph5m = 0
+          let dayEph1h = 0
           for (const key of usageKeys) {
             const data = usageDataMap.get(key)
             if (data) {
@@ -825,6 +866,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
               dayRequests += parseInt(data.requests) || 0
               dayCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
               dayCacheReadTokens += parseInt(data.cacheReadTokens) || 0
+              dayEph5m += parseInt(data.ephemeral5mTokens) || 0
+              dayEph1h += parseInt(data.ephemeral1hTokens) || 0
             }
           }
 
@@ -834,6 +877,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
             cache_creation_input_tokens: dayCacheCreateTokens,
             cache_read_input_tokens: dayCacheReadTokens
           }
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (dayEph5m > 0 || dayEph1h > 0) {
+            usage.cache_creation = {
+              ephemeral_5m_input_tokens: dayEph5m,
+              ephemeral_1h_input_tokens: dayEph1h
+            }
+          }
           const costResult = CostCalculator.calculateCost(usage, 'unknown')
           dayCost = costResult.costs.total
         }
@@ -1097,6 +1147,16 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
               cache_read_input_tokens: usageData.cacheReadTokens || 0
             }
 
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const histEph5m = usageData.ephemeral5mTokens || 0
+            const histEph1h = usageData.ephemeral1hTokens || 0
+            if (histEph5m > 0 || histEph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: histEph5m,
+                ephemeral_1h_input_tokens: histEph1h
+              }
+            }
+
             // 对于汇总数据，使用默认模型计算费用
             const costData = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
 
@@ -1472,6 +1532,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
               cache_creation_input_tokens: cacheCreateTokens,
               cache_read_input_tokens: cacheReadTokens
             }
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const fbEph5m = parseInt(data.ephemeral5mTokens) || 0
+            const fbEph1h = parseInt(data.ephemeral1hTokens) || 0
+            if (fbEph5m > 0 || fbEph1h > 0) {
+              fallbackUsage.cache_creation = {
+                ephemeral_5m_input_tokens: fbEph5m,
+                ephemeral_1h_input_tokens: fbEph1h
+              }
+            }
             const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
             cost = fallbackResult.costs.total
           }
@@ -1640,6 +1709,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
               cache_creation_input_tokens: cacheCreateTokens,
               cache_read_input_tokens: cacheReadTokens
             }
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const fbEph5m = parseInt(data.ephemeral5mTokens) || 0
+            const fbEph1h = parseInt(data.ephemeral1hTokens) || 0
+            if (fbEph5m > 0 || fbEph1h > 0) {
+              fallbackUsage.cache_creation = {
+                ephemeral_5m_input_tokens: fbEph5m,
+                ephemeral_1h_input_tokens: fbEph1h
+              }
+            }
             const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
             cost = fallbackResult.costs.total
           }
@@ -1834,7 +1912,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             inputTokens,
             outputTokens,
             cacheCreateTokens,
-            cacheReadTokens
+            cacheReadTokens,
+            ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0,
+            ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0
           })
         }
 
@@ -1860,6 +1940,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+          const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+          if (eph5m > 0 || eph1h > 0) {
+            usage.cache_creation = {
+              ephemeral_5m_input_tokens: eph5m,
+              ephemeral_1h_input_tokens: eph1h
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(usage, model)
           const currentCost = apiKeyCostMap.get(apiKeyId) || 0
           apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
@@ -1878,6 +1968,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
               cache_creation_input_tokens: data.cacheCreateTokens,
               cache_read_input_tokens: data.cacheReadTokens
             }
+            if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: data.ephemeral5mTokens,
+                ephemeral_1h_input_tokens: data.ephemeral1hTokens
+              }
+            }
             const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
             cost = fallbackResult.costs.total
             formattedCost = fallbackResult.formatted.total
@@ -1994,7 +2090,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             inputTokens,
             outputTokens,
             cacheCreateTokens,
-            cacheReadTokens
+            cacheReadTokens,
+            ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0,
+            ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0
           })
         }
 
@@ -2020,6 +2118,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+          const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+          if (eph5m > 0 || eph1h > 0) {
+            usage.cache_creation = {
+              ephemeral_5m_input_tokens: eph5m,
+              ephemeral_1h_input_tokens: eph1h
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(usage, model)
           const currentCost = apiKeyCostMap.get(apiKeyId) || 0
           apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
@@ -2038,6 +2146,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
               cache_creation_input_tokens: data.cacheCreateTokens,
               cache_read_input_tokens: data.cacheReadTokens
             }
+            if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: data.ephemeral5mTokens,
+                ephemeral_1h_input_tokens: data.ephemeral1hTokens
+              }
+            }
             const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
             cost = fallbackResult.costs.total
             formattedCost = fallbackResult.formatted.total
@@ -2189,7 +2303,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
             inputTokens: 0,
             outputTokens: 0,
             cacheCreateTokens: 0,
-            cacheReadTokens: 0
+            cacheReadTokens: 0,
+            ephemeral5mTokens: 0,
+            ephemeral1hTokens: 0
           })
         }
 
@@ -2198,6 +2314,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
         modelUsage.outputTokens += parseInt(data.outputTokens) || 0
         modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
         modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+        modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+        modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
       }
 
       // 计算7天统计的费用
@@ -2211,6 +2329,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
           cache_read_input_tokens: usage.cacheReadTokens
         }
 
+        // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+        if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+          usageData.cache_creation = {
+            ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+            ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(usageData, model)
         totalCosts.inputCost += costResult.costs.input
         totalCosts.outputCost += costResult.costs.output
@@ -2290,7 +2416,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
               inputTokens: 0,
               outputTokens: 0,
               cacheCreateTokens: 0,
-              cacheReadTokens: 0
+              cacheReadTokens: 0,
+              ephemeral5mTokens: 0,
+              ephemeral1hTokens: 0
             })
           }
 
@@ -2299,6 +2427,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
           modelUsage.outputTokens += parseInt(data.outputTokens) || 0
           modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
           modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+          modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+          modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
         }
 
         // 使用模型级别的数据计算费用
@@ -2312,6 +2442,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
             cache_read_input_tokens: usage.cacheReadTokens
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+            usageData.cache_creation = {
+              ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+              ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(usageData, model)
           totalCosts.inputCost += costResult.costs.input
           totalCosts.outputCost += costResult.costs.output
@@ -2352,6 +2490,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
               cache_read_input_tokens: apiKey.usage.total.cacheReadTokens || 0
             }
 
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const totalEph5m = apiKey.usage.total.ephemeral5mTokens || 0
+            const totalEph1h = apiKey.usage.total.ephemeral1hTokens || 0
+            if (totalEph5m > 0 || totalEph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: totalEph5m,
+                ephemeral_1h_input_tokens: totalEph1h
+              }
+            }
+
             // 使用加权平均价格计算（基于当前活跃模型的价格分布）
             const costResult = CostCalculator.calculateCost(usage, 'claude-3-5-haiku-20241022')
             totalCosts.inputCost += costResult.costs.input
@@ -2424,6 +2572,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
         cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0
       }
 
+      // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+      const eph5m = parseInt(data.ephemeral5mTokens) || 0
+      const eph1h = parseInt(data.ephemeral1hTokens) || 0
+      if (eph5m > 0 || eph1h > 0) {
+        usage.cache_creation = {
+          ephemeral_5m_input_tokens: eph5m,
+          ephemeral_1h_input_tokens: eph1h
+        }
+      }
+
       const costResult = CostCalculator.calculateCost(usage, model)
 
       // 累加总费用
@@ -2564,13 +2722,27 @@ router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res)
       return null
     }
 
-    const toUsageObject = (record) => ({
-      input_tokens: record.inputTokens || 0,
-      output_tokens: record.outputTokens || 0,
-      cache_creation_input_tokens: record.cacheCreateTokens || 0,
-      cache_read_input_tokens: record.cacheReadTokens || 0,
-      cache_creation: record.cacheCreation || record.cache_creation || null
-    })
+    const toUsageObject = (record) => {
+      const usage = {
+        input_tokens: record.inputTokens || 0,
+        output_tokens: record.outputTokens || 0,
+        cache_creation_input_tokens: record.cacheCreateTokens || 0,
+        cache_read_input_tokens: record.cacheReadTokens || 0,
+        cache_creation: record.cacheCreation || record.cache_creation || null
+      }
+      // 如果没有 cache_creation 但有独立存储的 ephemeral 字段，构建子对象
+      if (!usage.cache_creation) {
+        const eph5m = parseInt(record.ephemeral5mTokens) || 0
+        const eph1h = parseInt(record.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+      }
+      return usage
+    }
 
     const withinRange = (record) => {
       if (!record.timestamp) {
@@ -2863,13 +3035,27 @@ router.get('/accounts/:accountId/usage-records', authenticateAdmin, async (req,
       keysToUse = [{ id: apiKeyId }]
     }
 
-    const toUsageObject = (record) => ({
-      input_tokens: record.inputTokens || 0,
-      output_tokens: record.outputTokens || 0,
-      cache_creation_input_tokens: record.cacheCreateTokens || 0,
-      cache_read_input_tokens: record.cacheReadTokens || 0,
-      cache_creation: record.cacheCreation || record.cache_creation || null
-    })
+    const toUsageObject = (record) => {
+      const usage = {
+        input_tokens: record.inputTokens || 0,
+        output_tokens: record.outputTokens || 0,
+        cache_creation_input_tokens: record.cacheCreateTokens || 0,
+        cache_read_input_tokens: record.cacheReadTokens || 0,
+        cache_creation: record.cacheCreation || record.cache_creation || null
+      }
+      // 如果没有 cache_creation 但有独立存储的 ephemeral 字段，构建子对象
+      if (!usage.cache_creation) {
+        const eph5m = parseInt(record.ephemeral5mTokens) || 0
+        const eph1h = parseInt(record.ephemeral1hTokens) || 0
+        if (eph5m > 0 || eph1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: eph5m,
+            ephemeral_1h_input_tokens: eph1h
+          }
+        }
+      }
+      return usage
+    }
 
     const withinRange = (record) => {
       if (!record.timestamp) {
diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js
index a156df65..86f1738b 100644
--- a/src/routes/apiStats.js
+++ b/src/routes/apiStats.js
@@ -317,6 +317,14 @@ router.post('/api/user-stats', async (req, res) => {
             cache_read_input_tokens: usage.cacheReadTokens || 0
           }
 
+          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+          if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+            costUsage.cache_creation = {
+              ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+              ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+            }
+          }
+
           const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022')
           totalCost = costResult.costs.total
         }
@@ -335,6 +343,14 @@ router.post('/api/user-stats', async (req, res) => {
           cache_read_input_tokens: usage.cacheReadTokens || 0
         }
 
+        // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+        if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+          costUsage.cache_creation = {
+            ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+            ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+          }
+        }
+
         const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022')
         totalCost = costResult.costs.total
         formattedCost = costResult.formatted.total
@@ -804,6 +820,8 @@ router.post('/api/batch-model-stats', async (req, res) => {
                 outputTokens: 0,
                 cacheCreateTokens: 0,
                 cacheReadTokens: 0,
+                ephemeral5mTokens: 0,
+                ephemeral1hTokens: 0,
                 allTokens: 0,
                 realCostMicro: 0,
                 ratedCostMicro: 0,
@@ -817,6 +835,8 @@ router.post('/api/batch-model-stats', async (req, res) => {
             modelUsage.outputTokens += parseInt(data.outputTokens) || 0
             modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
             modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
+            modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
+            modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
             modelUsage.allTokens += parseInt(data.allTokens) || 0
             modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0
             modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
@@ -839,6 +859,14 @@ router.post('/api/batch-model-stats', async (req, res) => {
         cache_read_input_tokens: usage.cacheReadTokens
       }
 
+      // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+      if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+        usageData.cache_creation = {
+          ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+          ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+        }
+      }
+
       // 优先使用存储的费用，否则回退到重新计算
       const { hasStoredCost } = usage
       const costData = CostCalculator.calculateCost(usageData, model)
@@ -1368,6 +1396,8 @@ router.post('/api/user-model-stats', async (req, res) => {
       const model = match[1]
 
       if (data && Object.keys(data).length > 0) {
+        const ephemeral5m = parseInt(data.ephemeral5mTokens) || 0
+        const ephemeral1h = parseInt(data.ephemeral1hTokens) || 0
         const usage = {
           input_tokens: parseInt(data.inputTokens) || 0,
           output_tokens: parseInt(data.outputTokens) || 0,
@@ -1375,6 +1405,14 @@ router.post('/api/user-model-stats', async (req, res) => {
           cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0
         }
 
+        // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+        if (ephemeral5m > 0 || ephemeral1h > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: ephemeral5m,
+            ephemeral_1h_input_tokens: ephemeral1h
+          }
+        }
+
         // 优先使用存储的费用，否则回退到重新计算
         // 检查字段是否存在（而非 > 0），以支持真正的零成本场景
         const realCostMicro = parseInt(data.realCostMicro) || 0
diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js
index 0ca80b9f..605707aa 100644
--- a/src/services/apiKeyService.js
+++ b/src/services/apiKeyService.js
@@ -1599,6 +1599,8 @@ class ApiKeyService {
             outputTokens,
             cacheCreateTokens,
             cacheReadTokens,
+            0, // ephemeral5mTokens - recordUsage 不含详细缓存数据
+            0, // ephemeral1hTokens - recordUsage 不含详细缓存数据
             model,
             isLongContextRequest
           )
@@ -1834,6 +1836,8 @@ class ApiKeyService {
             outputTokens,
             cacheCreateTokens,
             cacheReadTokens,
+            ephemeral5mTokens,
+            ephemeral1hTokens,
             model,
             costInfo.isLongContextRequest || false
           )
diff --git a/src/services/costInitService.js b/src/services/costInitService.js
index 5463871f..3207f996 100644
--- a/src/services/costInitService.js
+++ b/src/services/costInitService.js
@@ -201,6 +201,16 @@ class CostInitService {
           parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0
       }
 
+      // 添加 cache_creation 子对象以支持精确 ephemeral 定价
+      const eph5m = parseInt(data.ephemeral5mTokens) || 0
+      const eph1h = parseInt(data.ephemeral1hTokens) || 0
+      if (eph5m > 0 || eph1h > 0) {
+        usage.cache_creation = {
+          ephemeral_5m_input_tokens: eph5m,
+          ephemeral_1h_input_tokens: eph1h
+        }
+      }
+
       const costResult = CostCalculator.calculateCost(usage, model)
       const cost = costResult.costs.total
 
diff --git a/src/services/droidRelayService.js b/src/services/droidRelayService.js
index 8e663611..0ed4e37e 100644
--- a/src/services/droidRelayService.js
+++ b/src/services/droidRelayService.js
@@ -1275,6 +1275,8 @@ class DroidRelayService {
           usageObject.output_tokens || 0,
           usageObject.cache_creation_input_tokens || 0,
           usageObject.cache_read_input_tokens || 0,
+          0, // ephemeral5mTokens - Droid 不含详细缓存数据
+          0, // ephemeral1hTokens - Droid 不含详细缓存数据
           model,
           false
         )

From 93b655d65c20a999af46756a99ff75520f5f92a2 Mon Sep 17 00:00:00 2001
From: sczheng189 <724100151@qq.com>
Date: Mon, 23 Feb 2026 21:23:38 +0800
Subject: [PATCH 5/6] Revert "fix: update the limit logic, and if the daily
 limit is not reached after reaching the opus weekly limit, other claude
 models can be used"

This reverts commit f444af49bfb0159f6aa0835b40a7049c2e4199d6.
---
 src/middleware/auth.js                      |  4 ++--
 src/services/apiKeyService.js               |  4 ++--
 src/services/weeklyClaudeCostInitService.js |  4 ++--
 src/utils/modelHelper.js                    | 17 -----------------
 4 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/src/middleware/auth.js b/src/middleware/auth.js
index 66fa1387..cf76b0b7 100644
--- a/src/middleware/auth.js
+++ b/src/middleware/auth.js
@@ -9,7 +9,7 @@ const ClientValidator = require('../validators/clientValidator')
 const ClaudeCodeValidator = require('../validators/clients/claudeCodeValidator')
 const claudeRelayConfigService = require('../services/claudeRelayConfigService')
 const { calculateWaitTimeStats } = require('../utils/statsHelper')
-const { isOpusModel } = require('../utils/modelHelper')
+const { isClaudeFamilyModel } = require('../utils/modelHelper')
 
 // 工具函数
 function sleep(ms) {
@@ -1256,7 +1256,7 @@ const authenticateApiKey = async (req, res, next) => {
       const model = requestBody.model || ''
 
       // 判断是否为 Claude 模型
-      if (isOpusModel(model)) {
+      if (isClaudeFamilyModel(model)) {
         const weeklyOpusCost = validation.keyData.weeklyOpusCost || 0
 
         if (weeklyOpusCost >= weeklyOpusCostLimit) {
diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js
index b51e6637..605707aa 100644
--- a/src/services/apiKeyService.js
+++ b/src/services/apiKeyService.js
@@ -4,7 +4,7 @@ const config = require('../../config/config')
 const redis = require('../models/redis')
 const logger = require('../utils/logger')
 const serviceRatesService = require('./serviceRatesService')
-const { isOpusModel } = require('../utils/modelHelper')
+const { isClaudeFamilyModel } = require('../utils/modelHelper')
 
 const ACCOUNT_TYPE_CONFIG = {
   claude: { prefix: 'claude:account:' },
@@ -1651,7 +1651,7 @@ class ApiKeyService {
   async recordOpusCost(keyId, ratedCost, realCost, model, accountType) {
     try {
       // 判断是否为 Claude 系列模型（包含 Bedrock 格式等）
-      if (!isOpusModel(model)) {
+      if (!isClaudeFamilyModel(model)) {
         return
       }
 
diff --git a/src/services/weeklyClaudeCostInitService.js b/src/services/weeklyClaudeCostInitService.js
index 2dfb1470..1268329f 100644
--- a/src/services/weeklyClaudeCostInitService.js
+++ b/src/services/weeklyClaudeCostInitService.js
@@ -2,7 +2,7 @@ const redis = require('../models/redis')
 const logger = require('../utils/logger')
 const pricingService = require('./pricingService')
 const serviceRatesService = require('./serviceRatesService')
-const { isOpusModel } = require('../utils/modelHelper')
+const { isClaudeFamilyModel } = require('../utils/modelHelper')
 
 function pad2(n) {
   return String(n).padStart(2, '0')
@@ -151,7 +151,7 @@ class WeeklyClaudeCostInitService {
             }
             const keyId = match[1]
             const model = match[2]
-            if (!isOpusModel(model)) {
+            if (!isClaudeFamilyModel(model)) {
               continue
             }
             matchedClaudeKeys++
diff --git a/src/utils/modelHelper.js b/src/utils/modelHelper.js
index 91fda718..c3fecc98 100644
--- a/src/utils/modelHelper.js
+++ b/src/utils/modelHelper.js
@@ -188,22 +188,6 @@ function isOpus45OrNewer(modelName) {
   return false
 }
 
-/**
- * 判断是否为 Opus 模型（任意版本）
- * 匹配所有包含 "opus" 关键词的 Claude 模型
- */
-function isOpusModel(modelName) {
-  if (!modelName || typeof modelName !== 'string') {
-    return false
-  }
-  const { baseModel } = parseVendorPrefixedModel(modelName)
-  const m = (baseModel || '').trim().toLowerCase()
-  if (!m) {
-    return false
-  }
-  return m.includes('opus')
-}
-
 /**
  * 判断某个 model 名称是否属于 Anthropic Claude 系列模型。
  *
@@ -253,6 +237,5 @@ module.exports = {
   getEffectiveModel,
   getVendorType,
   isOpus45OrNewer,
-  isOpusModel,
   isClaudeFamilyModel
 }

From 823693afda2ff8354974568ef28f6d33e0fc687b Mon Sep 17 00:00:00 2001
From: sczheng189 <724100151@qq.com>
Date: Mon, 23 Feb 2026 23:27:19 +0800
Subject: [PATCH 6/6] =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/routes/api.js              |  4 +++-
 src/services/pricingService.js | 14 +++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/routes/api.js b/src/routes/api.js
index c469dd2f..85ad01f5 100644
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -469,7 +469,9 @@ async function handleMessagesRequest(req, res) {
                 cache_read_input_tokens: cacheReadTokens
               }
               const requestBetaHeader =
-                _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA']
+                _headers['anthropic-beta'] ||
+                _headers['Anthropic-Beta'] ||
+                _headers['ANTHROPIC-BETA']
               if (requestBetaHeader) {
                 usageObject.request_anthropic_beta = requestBetaHeader
               }
diff --git a/src/services/pricingService.js b/src/services/pricingService.js
index 55423361..0eb3b2f6 100644
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -566,11 +566,11 @@ class PricingService {
       if (!key.startsWith('fast/')) {
         continue
       }
-      const normalizedFastKey = key
-        .slice('fast/'.length)
-        .toLowerCase()
-        .replace(/[_-]/g, '')
-      if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) {
+      const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '')
+      if (
+        normalizedFastKey.includes(normalizedModel) ||
+        normalizedModel.includes(normalizedFastKey)
+      ) {
         logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
         return value
       }
@@ -700,7 +700,7 @@ class PricingService {
 
     // 确定实际使用的输入价格（普通或 200K+ 高档价格）
     // Claude 模型在 200K+ 场景下如果缺少官方字段，按 2 倍输入价兜底
-    let actualInputPrice = useLongContextPricing
+    const actualInputPrice = useLongContextPricing
       ? hasInput200kPrice
         ? pricing.input_cost_per_token_above_200k_tokens
         : isClaudeModel
@@ -712,7 +712,7 @@ class PricingService {
     const hasOutput200kPrice =
       pricing.output_cost_per_token_above_200k_tokens !== null &&
       pricing.output_cost_per_token_above_200k_tokens !== undefined
-    let actualOutputPrice = useLongContextPricing
+    const actualOutputPrice = useLongContextPricing
       ? hasOutput200kPrice
         ? pricing.output_cost_per_token_above_200k_tokens
         : baseOutputPrice