fix: 修复1h缓存定价与sub2api项目对齐

2026-03-30 00:51:04 +00:00 · 2026-03-03 11:08:13 +08:00
parent d7b4a54a4c
commit 99dd19b32d
5 changed files with 6376 additions and 22539 deletions
--- a/resources/model-pricing/model_prices_and_context_window.json
+++ b/resources/model-pricing/model_prices_and_context_window.json
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -1222,21 +1222,61 @@ async function handleMessagesRequest(req, res) {
        ) {
          const inputTokens = jsonData.usage.input_tokens || 0
          const outputTokens = jsonData.usage.output_tokens || 0
-          const cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
+          let cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
          let ephemeral5mTokens = 0
          let ephemeral1hTokens = 0
          if (jsonData.usage.cache_creation && typeof jsonData.usage.cache_creation === 'object') {
            ephemeral5mTokens = jsonData.usage.cache_creation.ephemeral_5m_input_tokens || 0
            ephemeral1hTokens = jsonData.usage.cache_creation.ephemeral_1h_input_tokens || 0
            cacheCreateTokens = ephemeral5mTokens + ephemeral1hTokens
          }
          const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
          // Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
          const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
          const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
          const model = usageBaseModel || rawModel
          // 构建 usage 对象以传递给 recordUsageWithDetails
          const usageObject = {
            input_tokens: inputTokens,
            output_tokens: outputTokens,
            cache_creation_input_tokens: cacheCreateTokens,
            cache_read_input_tokens: cacheReadTokens
          }
          // 添加请求元信息
          const requestBetaHeader =
            _headersNonStream['anthropic-beta'] ||
            _headersNonStream['Anthropic-Beta'] ||
            _headersNonStream['ANTHROPIC-BETA']
          if (requestBetaHeader) {
            usageObject.request_anthropic_beta = requestBetaHeader
          }
          if (
            typeof _requestBodyNonStream?.speed === 'string' &&
            _requestBodyNonStream.speed.trim()
          ) {
            usageObject.request_speed = _requestBodyNonStream.speed.trim().toLowerCase()
          }
          if (typeof jsonData.usage.speed === 'string' && jsonData.usage.speed.trim()) {
            usageObject.speed = jsonData.usage.speed.trim().toLowerCase()
          }
          // 添加 cache_creation 子对象
          if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
            usageObject.cache_creation = {
              ephemeral_5m_input_tokens: ephemeral5mTokens,
              ephemeral_1h_input_tokens: ephemeral1hTokens
            }
          }
          // 记录真实的token使用量（包含模型信息和所有4种token以及账户ID）
          const { accountId: responseAccountId } = response
-          const nonStreamCosts = await apiKeyService.recordUsage(
+          const nonStreamCosts = await apiKeyService.recordUsageWithDetails(
            _apiKeyIdNonStream,
-            inputTokens,
+            usageObject,
            outputTokens,
            cacheCreateTokens,
            cacheReadTokens,
            model,
            responseAccountId,
            accountType
@@ -1259,7 +1299,7 @@ async function handleMessagesRequest(req, res) {
          usageRecorded = true
          logger.api(
-            `📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
+            `📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens} (5m: ${ephemeral5mTokens}, 1h: ${ephemeral1hTokens}), Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
          )
        } else {
          logger.warn('⚠️ No usage data found in Claude API JSON response')
--- a/src/services/costInitService.js
+++ b/src/services/costInitService.js
@@ -11,7 +11,11 @@ const USAGE_FIELDS = [
  'totalCacheCreateTokens',
  'cacheCreateTokens',
  'totalCacheReadTokens',
-  'cacheReadTokens'
+  'cacheReadTokens',
  'ephemeral5mTokens',
  'ephemeral1hTokens',
  'totalEphemeral5mTokens',
  'totalEphemeral1hTokens'
 ]
 class CostInitService {
@@ -202,8 +206,8 @@ class CostInitService {
      }
      // 添加 cache_creation 子对象以支持精确 ephemeral 定价
-      const eph5m = parseInt(data.ephemeral5mTokens) || 0
+      const eph5m = parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
-      const eph1h = parseInt(data.ephemeral1hTokens) || 0
+      const eph1h = parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
      if (eph5m > 0 || eph1h > 0) {
        usage.cache_creation = {
          ephemeral_5m_input_tokens: eph5m,
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -28,43 +28,7 @@ class PricingService {
    this.updateTimer = null // 定时更新任务句柄
    this.hashSyncInProgress = false // 哈希同步状态
-    // 硬编码的 1 小时缓存价格（美元/百万 token）
+    // Claude Prompt Caching 官方倍率（基于输入价格）— 仅作为 model_pricing.json 缺失字段时的兜底
    // ephemeral_5m 的价格使用 model_pricing.json 中的 cache_creation_input_token_cost
    // ephemeral_1h 的价格需要硬编码
    this.ephemeral1hPricing = {
      // Opus 系列: $30/MTok
      'claude-opus-4-1': 0.00003,
      'claude-opus-4-1-20250805': 0.00003,
      'claude-opus-4': 0.00003,
      'claude-opus-4-20250514': 0.00003,
      'claude-3-opus': 0.00003,
      'claude-3-opus-latest': 0.00003,
      'claude-3-opus-20240229': 0.00003,
      // Sonnet 系列: $6/MTok
      'claude-3-5-sonnet': 0.000006,
      'claude-3-5-sonnet-latest': 0.000006,
      'claude-3-5-sonnet-20241022': 0.000006,
      'claude-3-5-sonnet-20240620': 0.000006,
      'claude-3-sonnet': 0.000006,
      'claude-3-sonnet-20240307': 0.000006,
      'claude-sonnet-3': 0.000006,
      'claude-sonnet-3-5': 0.000006,
      'claude-sonnet-3-7': 0.000006,
      'claude-sonnet-4': 0.000006,
      'claude-sonnet-4-20250514': 0.000006,
      // Haiku 系列: $1.6/MTok
      'claude-3-5-haiku': 0.0000016,
      'claude-3-5-haiku-latest': 0.0000016,
      'claude-3-5-haiku-20241022': 0.0000016,
      'claude-3-haiku': 0.0000016,
      'claude-3-haiku-20240307': 0.0000016,
      'claude-haiku-3': 0.0000016,
      'claude-haiku-3-5': 0.0000016
    }
    // Claude Prompt Caching 官方倍率（基于输入价格）
    this.claudeCacheMultipliers = {
      write5m: 1.25,
      write1h: 2,
@@ -536,50 +500,6 @@ class PricingService {
    return modelName.replace(/\[1m\]/gi, '').trim()
  }
  // 获取 1 小时缓存价格（优先使用 model_pricing.json 中的模型字段）
  getEphemeral1hPricing(modelName, pricing = null) {
    if (
      pricing?.cache_creation_input_token_cost_above_1hr !== null &&
      pricing?.cache_creation_input_token_cost_above_1hr !== undefined
    ) {
      return pricing.cache_creation_input_token_cost_above_1hr
    }
    if (!modelName) {
      return 0
    }
    // 尝试直接匹配
    if (
      this.ephemeral1hPricing[modelName] !== null &&
      this.ephemeral1hPricing[modelName] !== undefined
    ) {
      return this.ephemeral1hPricing[modelName]
    }
    // 处理各种模型名称变体
    const modelLower = modelName.toLowerCase()
    // 检查是否是 Opus 系列
    if (modelLower.includes('opus')) {
      return 0.00001 // $10/MTok
    }
    // 检查是否是 Sonnet 系列
    if (modelLower.includes('sonnet')) {
      return 0.000006 // $6/MTok
    }
    // 检查是否是 Haiku 系列
    if (modelLower.includes('haiku')) {
      return 0.000002 // $2/MTok
    }
    // 默认返回 0（未知模型）
    logger.debug(`💰 No 1h cache pricing found for model: ${modelName}`)
    return 0
  }
  // 计算使用费用
  calculateCost(usage, modelName) {
    const normalizedModelName = this.stripLongContextSuffix(modelName)
@@ -675,43 +595,58 @@ class PricingService {
        : baseOutputPrice
      : baseOutputPrice
-    // 应用 Fast Mode 倍率（在 200K+ 价格之上叠加）
+    // 缓存价格：优先从 model_pricing.json 取，Claude 缺失时用倍率兜底
    if (fastMultiplier > 1) {
      actualInputPrice *= fastMultiplier
      actualOutputPrice *= fastMultiplier
    }
    let actualCacheCreatePrice = 0
    let actualCacheReadPrice = 0
    let actualEphemeral1hPrice = 0
-    if (isClaudeModel) {
+    if (useLongContextPricing) {
-      // Claude 模型缓存价格统一按输入价格倍率推导，避免来源字段不一致导致计费偏差
+      // 200K+：Claude 仅用 above_200k 专用字段，缺失留 0 让下方兜底从 actualInputPrice 推导
-      actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
+      actualCacheCreatePrice = isClaudeModel
-      actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
+        ? pricing.cache_creation_input_token_cost_above_200k_tokens || 0
-      actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
+        : pricing.cache_creation_input_token_cost_above_200k_tokens ||
    } else {
      actualCacheCreatePrice = useLongContextPricing
        ? pricing.cache_creation_input_token_cost_above_200k_tokens ||
          pricing.cache_creation_input_token_cost ||
          0
-        : pricing.cache_creation_input_token_cost || 0
+      actualCacheReadPrice = isClaudeModel
-
+        ? pricing.cache_read_input_token_cost_above_200k_tokens || 0
-      actualCacheReadPrice = useLongContextPricing
+        : pricing.cache_read_input_token_cost_above_200k_tokens ||
        ? pricing.cache_read_input_token_cost_above_200k_tokens ||
          pricing.cache_read_input_token_cost ||
          0
-        : pricing.cache_read_input_token_cost || 0
+      const has1h200k =
        pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
        pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
      actualEphemeral1hPrice = has1h200k
        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
        : isClaudeModel
          ? 0
          : pricing.cache_creation_input_token_cost_above_1hr || 0
    } else {
      actualCacheCreatePrice = pricing.cache_creation_input_token_cost || 0
      actualCacheReadPrice = pricing.cache_read_input_token_cost || 0
      actualEphemeral1hPrice = pricing.cache_creation_input_token_cost_above_1hr || 0
    }
-      const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
+    // Claude 兜底：pricing 字段缺失时用倍率从 actualInputPrice 推导
    // 此时 actualInputPrice 尚未含 fastMultiplier，下方统一应用
    if (isClaudeModel) {
      if (!actualCacheCreatePrice) {
        actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
      }
      if (!actualCacheReadPrice) {
        actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
      }
      if (!actualEphemeral1hPrice) {
        actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
      }
    }
-      // 非 Claude 模型维持原有字段优先级
+    // Fast Mode 倍率：统一一次性应用于所有价格
-      actualEphemeral1hPrice = useLongContextPricing
+    if (fastMultiplier > 1) {
-        ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
+      actualInputPrice *= fastMultiplier
-          pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
+      actualOutputPrice *= fastMultiplier
-          ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
+      actualCacheCreatePrice *= fastMultiplier
-          : defaultEphemeral1hPrice
+      actualCacheReadPrice *= fastMultiplier
-        : defaultEphemeral1hPrice
+      actualEphemeral1hPrice *= fastMultiplier
    }
    // 计算各项费用
--- a/src/utils/costCalculator.js
+++ b/src/utils/costCalculator.js
@@ -239,6 +239,16 @@ class CostCalculator {
        aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
    }
    // 如果有 ephemeral 拆分数据，构建 cache_creation 子对象
    const eph5m = aggregatedUsage.ephemeral5mTokens || aggregatedUsage.totalEphemeral5mTokens || 0
    const eph1h = aggregatedUsage.ephemeral1hTokens || aggregatedUsage.totalEphemeral1hTokens || 0
    if (eph5m > 0 || eph1h > 0) {
      usage.cache_creation = {
        ephemeral_5m_input_tokens: eph5m,
        ephemeral_1h_input_tokens: eph1h
      }
    }
    return this.calculateCost(usage, model)
  }