fix: 修复1h缓存定价与sub2api项目对齐

2026-03-29 21:56:18 +00:00 · 2026-03-03 11:08:13 +08:00
parent d7b4a54a4c
commit 99dd19b32d
5 changed files with 6376 additions and 22539 deletions
--- a/resources/model-pricing/model_prices_and_context_window.json
+++ b/resources/model-pricing/model_prices_and_context_window.json
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -1222,21 +1222,61 @@ async function handleMessagesRequest(req, res) {
        ) {
          const inputTokens = jsonData.usage.input_tokens || 0
          const outputTokens = jsonData.usage.output_tokens || 0
-          const cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
+          let cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
+          let ephemeral5mTokens = 0
+          let ephemeral1hTokens = 0
+
+          if (jsonData.usage.cache_creation && typeof jsonData.usage.cache_creation === 'object') {
+            ephemeral5mTokens = jsonData.usage.cache_creation.ephemeral_5m_input_tokens || 0
+            ephemeral1hTokens = jsonData.usage.cache_creation.ephemeral_1h_input_tokens || 0
+            cacheCreateTokens = ephemeral5mTokens + ephemeral1hTokens
+          }
+
          const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
          // Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
          const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
          const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
          const model = usageBaseModel || rawModel

+          // 构建 usage 对象以传递给 recordUsageWithDetails
+          const usageObject = {
+            input_tokens: inputTokens,
+            output_tokens: outputTokens,
+            cache_creation_input_tokens: cacheCreateTokens,
+            cache_read_input_tokens: cacheReadTokens
+          }
+
+          // 添加请求元信息
+          const requestBetaHeader =
+            _headersNonStream['anthropic-beta'] ||
+            _headersNonStream['Anthropic-Beta'] ||
+            _headersNonStream['ANTHROPIC-BETA']
+          if (requestBetaHeader) {
+            usageObject.request_anthropic_beta = requestBetaHeader
+          }
+          if (
+            typeof _requestBodyNonStream?.speed === 'string' &&
+            _requestBodyNonStream.speed.trim()
+          ) {
+            usageObject.request_speed = _requestBodyNonStream.speed.trim().toLowerCase()
+          }
+          if (typeof jsonData.usage.speed === 'string' && jsonData.usage.speed.trim()) {
+            usageObject.speed = jsonData.usage.speed.trim().toLowerCase()
+          }
+
+          // 添加 cache_creation 子对象
+          if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
+            usageObject.cache_creation = {
+              ephemeral_5m_input_tokens: ephemeral5mTokens,
+              ephemeral_1h_input_tokens: ephemeral1hTokens
+            }
+          }
+
          // 记录真实的token使用量（包含模型信息和所有4种token以及账户ID）
          const { accountId: responseAccountId } = response
-          const nonStreamCosts = await apiKeyService.recordUsage(
+          const nonStreamCosts = await apiKeyService.recordUsageWithDetails(
            _apiKeyIdNonStream,
-            inputTokens,
-            outputTokens,
-            cacheCreateTokens,
-            cacheReadTokens,
+            usageObject,
            model,
            responseAccountId,
            accountType
@@ -1259,7 +1299,7 @@ async function handleMessagesRequest(req, res) {

          usageRecorded = true
          logger.api(
-            `📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
+            `📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens} (5m: ${ephemeral5mTokens}, 1h: ${ephemeral1hTokens}), Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
          )
        } else {
          logger.warn('⚠️ No usage data found in Claude API JSON response')
--- a/src/services/costInitService.js
+++ b/src/services/costInitService.js
@@ -11,7 +11,11 @@ const USAGE_FIELDS = [
  'totalCacheCreateTokens',
  'cacheCreateTokens',
  'totalCacheReadTokens',
-  'cacheReadTokens'
+  'cacheReadTokens',
+  'ephemeral5mTokens',
+  'ephemeral1hTokens',
+  'totalEphemeral5mTokens',
+  'totalEphemeral1hTokens'
 ]

 class CostInitService {
@@ -202,8 +206,8 @@ class CostInitService {
      }

      // 添加 cache_creation 子对象以支持精确 ephemeral 定价
-      const eph5m = parseInt(data.ephemeral5mTokens) || 0
-      const eph1h = parseInt(data.ephemeral1hTokens) || 0
+      const eph5m = parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
+      const eph1h = parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
      if (eph5m > 0 || eph1h > 0) {
        usage.cache_creation = {
          ephemeral_5m_input_tokens: eph5m,
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -28,43 +28,7 @@ class PricingService {
    this.updateTimer = null // 定时更新任务句柄
    this.hashSyncInProgress = false // 哈希同步状态

-    // 硬编码的 1 小时缓存价格（美元/百万 token）
-    // ephemeral_5m 的价格使用 model_pricing.json 中的 cache_creation_input_token_cost
-    // ephemeral_1h 的价格需要硬编码
-    this.ephemeral1hPricing = {
-      // Opus 系列: $30/MTok
-      'claude-opus-4-1': 0.00003,
-      'claude-opus-4-1-20250805': 0.00003,
-      'claude-opus-4': 0.00003,
-      'claude-opus-4-20250514': 0.00003,
-      'claude-3-opus': 0.00003,
-      'claude-3-opus-latest': 0.00003,
-      'claude-3-opus-20240229': 0.00003,
-
-      // Sonnet 系列: $6/MTok
-      'claude-3-5-sonnet': 0.000006,
-      'claude-3-5-sonnet-latest': 0.000006,
-      'claude-3-5-sonnet-20241022': 0.000006,
-      'claude-3-5-sonnet-20240620': 0.000006,
-      'claude-3-sonnet': 0.000006,
-      'claude-3-sonnet-20240307': 0.000006,
-      'claude-sonnet-3': 0.000006,
-      'claude-sonnet-3-5': 0.000006,
-      'claude-sonnet-3-7': 0.000006,
-      'claude-sonnet-4': 0.000006,
-      'claude-sonnet-4-20250514': 0.000006,
-
-      // Haiku 系列: $1.6/MTok
-      'claude-3-5-haiku': 0.0000016,
-      'claude-3-5-haiku-latest': 0.0000016,
-      'claude-3-5-haiku-20241022': 0.0000016,
-      'claude-3-haiku': 0.0000016,
-      'claude-3-haiku-20240307': 0.0000016,
-      'claude-haiku-3': 0.0000016,
-      'claude-haiku-3-5': 0.0000016
-    }
-
-    // Claude Prompt Caching 官方倍率（基于输入价格）
+    // Claude Prompt Caching 官方倍率（基于输入价格）— 仅作为 model_pricing.json 缺失字段时的兜底
    this.claudeCacheMultipliers = {
      write5m: 1.25,
      write1h: 2,
@@ -536,50 +500,6 @@ class PricingService {
    return modelName.replace(/\[1m\]/gi, '').trim()
  }

-  // 获取 1 小时缓存价格（优先使用 model_pricing.json 中的模型字段）
-  getEphemeral1hPricing(modelName, pricing = null) {
-    if (
-      pricing?.cache_creation_input_token_cost_above_1hr !== null &&
-      pricing?.cache_creation_input_token_cost_above_1hr !== undefined
-    ) {
-      return pricing.cache_creation_input_token_cost_above_1hr
-    }
-
-    if (!modelName) {
-      return 0
-    }
-
-    // 尝试直接匹配
-    if (
-      this.ephemeral1hPricing[modelName] !== null &&
-      this.ephemeral1hPricing[modelName] !== undefined
-    ) {
-      return this.ephemeral1hPricing[modelName]
-    }
-
-    // 处理各种模型名称变体
-    const modelLower = modelName.toLowerCase()
-
-    // 检查是否是 Opus 系列
-    if (modelLower.includes('opus')) {
-      return 0.00001 // $10/MTok
-    }
-
-    // 检查是否是 Sonnet 系列
-    if (modelLower.includes('sonnet')) {
-      return 0.000006 // $6/MTok
-    }
-
-    // 检查是否是 Haiku 系列
-    if (modelLower.includes('haiku')) {
-      return 0.000002 // $2/MTok
-    }
-
-    // 默认返回 0（未知模型）
-    logger.debug(`💰 No 1h cache pricing found for model: ${modelName}`)
-    return 0
-  }
-
  // 计算使用费用
  calculateCost(usage, modelName) {
    const normalizedModelName = this.stripLongContextSuffix(modelName)
@@ -675,43 +595,58 @@ class PricingService {
        : baseOutputPrice
      : baseOutputPrice

-    // 应用 Fast Mode 倍率（在 200K+ 价格之上叠加）
-    if (fastMultiplier > 1) {
-      actualInputPrice *= fastMultiplier
-      actualOutputPrice *= fastMultiplier
-    }
-
+    // 缓存价格：优先从 model_pricing.json 取，Claude 缺失时用倍率兜底
    let actualCacheCreatePrice = 0
    let actualCacheReadPrice = 0
    let actualEphemeral1hPrice = 0

-    if (isClaudeModel) {
-      // Claude 模型缓存价格统一按输入价格倍率推导，避免来源字段不一致导致计费偏差
-      actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
-      actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
-      actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
-    } else {
-      actualCacheCreatePrice = useLongContextPricing
-        ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
+    if (useLongContextPricing) {
+      // 200K+：Claude 仅用 above_200k 专用字段，缺失留 0 让下方兜底从 actualInputPrice 推导
+      actualCacheCreatePrice = isClaudeModel
+        ? pricing.cache_creation_input_token_cost_above_200k_tokens || 0
+        : pricing.cache_creation_input_token_cost_above_200k_tokens ||
          pricing.cache_creation_input_token_cost ||
          0
-        : pricing.cache_creation_input_token_cost || 0
-
-      actualCacheReadPrice = useLongContextPricing
-        ? pricing.cache_read_input_token_cost_above_200k_tokens ||
+      actualCacheReadPrice = isClaudeModel
+        ? pricing.cache_read_input_token_cost_above_200k_tokens || 0
+        : pricing.cache_read_input_token_cost_above_200k_tokens ||
          pricing.cache_read_input_token_cost ||
          0
-        : pricing.cache_read_input_token_cost || 0
+      const has1h200k =
+        pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
+        pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
+      actualEphemeral1hPrice = has1h200k
+        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
+        : isClaudeModel
+          ? 0
+          : pricing.cache_creation_input_token_cost_above_1hr || 0
+    } else {
+      actualCacheCreatePrice = pricing.cache_creation_input_token_cost || 0
+      actualCacheReadPrice = pricing.cache_read_input_token_cost || 0
+      actualEphemeral1hPrice = pricing.cache_creation_input_token_cost_above_1hr || 0
+    }

-      const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
+    // Claude 兜底：pricing 字段缺失时用倍率从 actualInputPrice 推导
+    // 此时 actualInputPrice 尚未含 fastMultiplier，下方统一应用
+    if (isClaudeModel) {
+      if (!actualCacheCreatePrice) {
+        actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
+      }
+      if (!actualCacheReadPrice) {
+        actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
+      }
+      if (!actualEphemeral1hPrice) {
+        actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
+      }
+    }

-      // 非 Claude 模型维持原有字段优先级
-      actualEphemeral1hPrice = useLongContextPricing
-        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
-          pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
-          ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
-          : defaultEphemeral1hPrice
-        : defaultEphemeral1hPrice
+    // Fast Mode 倍率：统一一次性应用于所有价格
+    if (fastMultiplier > 1) {
+      actualInputPrice *= fastMultiplier
+      actualOutputPrice *= fastMultiplier
+      actualCacheCreatePrice *= fastMultiplier
+      actualCacheReadPrice *= fastMultiplier
+      actualEphemeral1hPrice *= fastMultiplier
    }

    // 计算各项费用
--- a/src/utils/costCalculator.js
+++ b/src/utils/costCalculator.js
@@ -239,6 +239,16 @@ class CostCalculator {
        aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
    }

+    // 如果有 ephemeral 拆分数据，构建 cache_creation 子对象
+    const eph5m = aggregatedUsage.ephemeral5mTokens || aggregatedUsage.totalEphemeral5mTokens || 0
+    const eph1h = aggregatedUsage.ephemeral1hTokens || aggregatedUsage.totalEphemeral1hTokens || 0
+    if (eph5m > 0 || eph1h > 0) {
+      usage.cache_creation = {
+        ephemeral_5m_input_tokens: eph5m,
+        ephemeral_1h_input_tokens: eph1h
+      }
+    }
+
    return this.calculateCost(usage, model)
  }