diff --git a/src/handlers/geminiHandlers.js b/src/handlers/geminiHandlers.js
index 40e4a703..9d4d2e26 100644
--- a/src/handlers/geminiHandlers.js
+++ b/src/handlers/geminiHandlers.js
@@ -209,7 +209,13 @@ function ensureGeminiPermissionMiddleware(req, res, next) {
 /**
  * 应用速率限制跟踪
  */
-async function applyRateLimitTracking(req, usageSummary, model, context = '') {
+async function applyRateLimitTracking(
+  req,
+  usageSummary,
+  model,
+  context = '',
+  preCalculatedCost = null
+) {
   if (!req.rateLimitInfo) {
     return
   }
@@ -222,7 +228,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '') {
       usageSummary,
       model,
       req.apiKey?.id,
-      'gemini'
+      'gemini',
+      preCalculatedCost
     )
 
     if (totalTokens > 0) {
@@ -1705,7 +1712,7 @@ async function handleGenerateContent(req, res) {
     if (response?.response?.usageMetadata) {
       try {
         const usage = response.response.usageMetadata
-        await apiKeyService.recordUsage(
+        const geminiNonStreamCosts = await apiKeyService.recordUsage(
           req.apiKey.id,
           usage.promptTokenCount || 0,
           usage.candidatesTokenCount || 0,
@@ -1728,7 +1735,8 @@ async function handleGenerateContent(req, res) {
             cacheReadTokens: 0
           },
           model,
-          'gemini-non-stream'
+          'gemini-non-stream',
+          geminiNonStreamCosts
         )
       } catch (error) {
         logger.error('Failed to record Gemini usage:', error)
@@ -2053,8 +2061,8 @@ async function handleStreamGenerateContent(req, res) {
 
       // 异步记录使用统计
       if (!usageReported && totalUsage.totalTokenCount > 0) {
-        Promise.all([
-          apiKeyService.recordUsage(
+        apiKeyService
+          .recordUsage(
             req.apiKey.id,
             totalUsage.promptTokenCount || 0,
             totalUsage.candidatesTokenCount || 0,
@@ -2063,19 +2071,21 @@ async function handleStreamGenerateContent(req, res) {
             model,
             account.id,
             'gemini'
-          ),
-          applyRateLimitTracking(
-            req,
-            {
-              inputTokens: totalUsage.promptTokenCount || 0,
-              outputTokens: totalUsage.candidatesTokenCount || 0,
-              cacheCreateTokens: 0,
-              cacheReadTokens: 0
-            },
-            model,
-            'gemini-stream'
           )
-        ])
+          .then((costs) =>
+            applyRateLimitTracking(
+              req,
+              {
+                inputTokens: totalUsage.promptTokenCount || 0,
+                outputTokens: totalUsage.candidatesTokenCount || 0,
+                cacheCreateTokens: 0,
+                cacheReadTokens: 0
+              },
+              model,
+              'gemini-stream',
+              costs
+            )
+          )
           .then(() => {
             logger.info(
               `📊 Recorded Gemini stream usage - Input: ${totalUsage.promptTokenCount}, Output: ${totalUsage.candidatesTokenCount}, Total: ${totalUsage.totalTokenCount}`
diff --git a/src/routes/admin/apiKeys.js b/src/routes/admin/apiKeys.js
index c374eb15..cff0fb65 100644
--- a/src/routes/admin/apiKeys.js
+++ b/src/routes/admin/apiKeys.js
@@ -1093,9 +1093,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
     searchPatterns.push(`usage:${keyId}:model:monthly:*:${currentMonth}`)
   } else {
-    // all - 获取所有数据（日和月数据都查）
-    searchPatterns.push(`usage:${keyId}:model:daily:*`)
-    searchPatterns.push(`usage:${keyId}:model:monthly:*`)
+    // all - 使用 alltime key（无 TTL，数据完整），避免 daily/monthly 键过期导致数据丢失
+    searchPatterns.push(`usage:${keyId}:model:alltime:*`)
   }
 
   // 使用 SCAN 收集所有匹配的 keys
@@ -1109,7 +1108,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     } while (cursor !== '0')
   }
 
-  // 去重（避免日数据和月数据重复计算）
+  // 去重
   const uniqueKeys = [...new Set(allKeys)]
 
   // 获取实时限制数据（窗口数据不受时间范围筛选影响，始终获取当前窗口状态）
@@ -1128,7 +1127,6 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     const apiKey = await redis.getApiKey(keyId)
     const rateLimitWindow = parseInt(apiKey?.rateLimitWindow) || 0
     const dailyCostLimit = parseFloat(apiKey?.dailyCostLimit) || 0
-    const totalCostLimit = parseFloat(apiKey?.totalCostLimit) || 0
     const weeklyOpusCostLimit = parseFloat(apiKey?.weeklyOpusCostLimit) || 0
 
     // 只在启用了每日费用限制时查询
@@ -1136,11 +1134,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
       dailyCost = await redis.getDailyCost(keyId)
     }
 
-    // 只在启用了总费用限制时查询
-    if (totalCostLimit > 0) {
-      const totalCostKey = `usage:cost:total:${keyId}`
-      allTimeCost = parseFloat((await client.get(totalCostKey)) || '0')
-    }
+    // 始终查询 allTimeCost（用于展示和限额校验）
+    const totalCostKey = `usage:cost:total:${keyId}`
+    allTimeCost = parseFloat((await client.get(totalCostKey)) || '0')
 
     // 只在启用了 Claude 周费用限制时查询（字段名沿用 weeklyOpusCostLimit）
     if (weeklyOpusCostLimit > 0) {
@@ -1149,7 +1145,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
       weeklyOpusCost = await redis.getWeeklyOpusCost(keyId, resetDay, resetHour)
     }
 
-    // 只在启用了窗口限制时查询窗口数据（移到早期返回之前，确保窗口数据始终被获取）
+    // 只在启用了窗口限制时查询窗口数据
     if (rateLimitWindow > 0) {
       const requestCountKey = `rate_limit:requests:${keyId}`
       const tokenCountKey = `rate_limit:tokens:${keyId}`
@@ -1180,37 +1176,23 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
         }
       }
     }
-
-    // 🔧 FIX: 对于 "全部时间" 时间范围，直接使用 allTimeCost
-    // 因为 usage:*:model:daily:* 键有 30 天 TTL，旧数据已经过期
-    if (timeRange === 'all' && allTimeCost > 0) {
-      logger.debug(`📊 使用 allTimeCost 计算 timeRange='all': ${allTimeCost}`)
-
-      return {
-        requests: 0, // 旧数据详情不可用
-        tokens: 0,
-        inputTokens: 0,
-        outputTokens: 0,
-        cacheCreateTokens: 0,
-        cacheReadTokens: 0,
-        cost: allTimeCost,
-        formattedCost: CostCalculator.formatCost(allTimeCost),
-        // 实时限制数据（始终返回，不受时间范围影响）
-        dailyCost,
-        weeklyOpusCost,
-        currentWindowCost,
-        currentWindowRequests,
-        currentWindowTokens,
-        windowRemainingSeconds,
-        windowStartTime,
-        windowEndTime,
-        allTimeCost
-      }
-    }
   } catch (error) {
     logger.warn(`⚠️ 获取实时限制数据失败 (key: ${keyId}):`, error.message)
   }
 
+  // 构建实时限制数据对象（各分支复用）
+  const limitData = {
+    dailyCost,
+    weeklyOpusCost,
+    currentWindowCost,
+    currentWindowRequests,
+    currentWindowTokens,
+    windowRemainingSeconds,
+    windowStartTime,
+    windowEndTime,
+    allTimeCost
+  }
+
   // 如果没有使用数据，返回零值但包含窗口数据
   if (uniqueKeys.length === 0) {
     return {
@@ -1221,17 +1203,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
       cacheCreateTokens: 0,
       cacheReadTokens: 0,
       cost: 0,
+      realCost: 0,
       formattedCost: '$0.00',
-      // 实时限制数据（始终返回，不受时间范围影响）
-      dailyCost,
-      weeklyOpusCost,
-      currentWindowCost,
-      currentWindowRequests,
-      currentWindowTokens,
-      windowRemainingSeconds,
-      windowStartTime,
-      windowEndTime,
-      allTimeCost
+      ...limitData
     }
   }
 
@@ -1246,10 +1220,13 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
   const modelStatsMap = new Map()
   let totalRequests = 0
 
+  // alltime key 的模式：usage:{keyId}:model:alltime:{model}
+  const alltimeKeyPattern = /usage:.+:model:alltime:(.+)$/
   // 用于去重：先统计月数据，避免与日数据重复
   const dailyKeyPattern = /usage:.+:model:daily:(.+):\d{4}-\d{2}-\d{2}$/
   const monthlyKeyPattern = /usage:.+:model:monthly:(.+):\d{4}-\d{2}$/
   const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
+  const isAlltimeQuery = timeRange === 'all'
 
   for (let i = 0; i < results.length; i++) {
     const [err, data] = results[i]
@@ -1262,27 +1239,37 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     let isMonthly = false
 
     // 提取模型名称
-    const dailyMatch = key.match(dailyKeyPattern)
-    const monthlyMatch = key.match(monthlyKeyPattern)
+    if (isAlltimeQuery) {
+      const alltimeMatch = key.match(alltimeKeyPattern)
+      if (alltimeMatch) {
+        model = alltimeMatch[1]
+      }
+    } else {
+      const dailyMatch = key.match(dailyKeyPattern)
+      const monthlyMatch = key.match(monthlyKeyPattern)
 
-    if (dailyMatch) {
-      model = dailyMatch[1]
-    } else if (monthlyMatch) {
-      model = monthlyMatch[1]
-      isMonthly = true
+      if (dailyMatch) {
+        model = dailyMatch[1]
+      } else if (monthlyMatch) {
+        model = monthlyMatch[1]
+        isMonthly = true
+      }
     }
 
     if (!model) {
       continue
     }
 
-    // 跳过当前月的月数据
-    if (isMonthly && key.includes(`:${currentMonth}`)) {
-      continue
-    }
-    // 跳过非当前月的日数据
-    if (!isMonthly && !key.includes(`:${currentMonth}-`)) {
-      continue
+    // 日/月去重逻辑（alltime 不需要去重）
+    if (!isAlltimeQuery) {
+      // 跳过当前月的月数据（当前月用日数据更精确）
+      if (isMonthly && key.includes(`:${currentMonth}`)) {
+        continue
+      }
+      // 跳过非当前月的日数据（非当前月用月数据）
+      if (!isMonthly && !key.includes(`:${currentMonth}-`)) {
+        continue
+      }
     }
 
     if (!modelStatsMap.has(model)) {
@@ -1293,7 +1280,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
         cacheReadTokens: 0,
         ephemeral5mTokens: 0,
         ephemeral1hTokens: 0,
-        requests: 0
+        requests: 0,
+        realCostMicro: 0,
+        ratedCostMicro: 0,
+        hasStoredCost: false
       })
     }
 
@@ -1310,11 +1300,19 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
       parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
     stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
 
+    // 累加已存储的费用（微美元）
+    if ('realCostMicro' in data || 'ratedCostMicro' in data) {
+      stats.realCostMicro += parseInt(data.realCostMicro) || 0
+      stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
+      stats.hasStoredCost = true
+    }
+
     totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
   }
 
-  // 计算费用
-  let totalCost = 0
+  // 汇总费用：优先使用已存储的费用，仅对无存储费用的旧数据 fallback 到 token 重算
+  let totalRatedCost = 0
+  let totalRealCost = 0
   let inputTokens = 0
   let outputTokens = 0
   let cacheCreateTokens = 0
@@ -1326,23 +1324,30 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     cacheCreateTokens += stats.cacheCreateTokens
     cacheReadTokens += stats.cacheReadTokens
 
-    const costUsage = {
-      input_tokens: stats.inputTokens,
-      output_tokens: stats.outputTokens,
-      cache_creation_input_tokens: stats.cacheCreateTokens,
-      cache_read_input_tokens: stats.cacheReadTokens
-    }
-
-    // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
-    if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
-      costUsage.cache_creation = {
-        ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
-        ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+    if (stats.hasStoredCost) {
+      // 使用请求时已计算并存储的费用（精确，包含 1M 上下文、特殊计费等）
+      totalRatedCost += stats.ratedCostMicro / 1000000
+      totalRealCost += stats.realCostMicro / 1000000
+    } else {
+      // Legacy fallback：旧数据没有存储费用，从 token 重算（不精确但聊胜于无）
+      const costUsage = {
+        input_tokens: stats.inputTokens,
+        output_tokens: stats.outputTokens,
+        cache_creation_input_tokens: stats.cacheCreateTokens,
+        cache_read_input_tokens: stats.cacheReadTokens
       }
-    }
 
-    const costResult = CostCalculator.calculateCost(costUsage, model)
-    totalCost += costResult.costs.total
+      if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
+        costUsage.cache_creation = {
+          ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
+          ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+        }
+      }
+
+      const costResult = CostCalculator.calculateCost(costUsage, model)
+      totalRatedCost += costResult.costs.total
+      totalRealCost += costResult.costs.total
+    }
   }
 
   const tokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens
@@ -1354,18 +1359,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
     outputTokens,
     cacheCreateTokens,
     cacheReadTokens,
-    cost: totalCost,
-    formattedCost: CostCalculator.formatCost(totalCost),
-    // 实时限制数据
-    dailyCost,
-    weeklyOpusCost,
-    currentWindowCost,
-    currentWindowRequests,
-    currentWindowTokens,
-    windowRemainingSeconds,
-    windowStartTime,
-    windowEndTime,
-    allTimeCost // 历史总费用（用于总费用限制）
+    cost: totalRatedCost,
+    realCost: totalRealCost,
+    formattedCost: CostCalculator.formatCost(totalRatedCost),
+    ...limitData
   }
 }
 
diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js
index c83aad4e..c8b5660b 100644
--- a/src/routes/admin/usageStats.js
+++ b/src/routes/admin/usageStats.js
@@ -1011,7 +1011,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
               cacheReadTokens: 0,
               ephemeral5mTokens: 0,
               ephemeral1hTokens: 0,
-              allTokens: 0
+              allTokens: 0,
+              realCostMicro: 0,
+              ratedCostMicro: 0,
+              hasStoredCost: false
             })
           }
           const stats = modelStatsMap.get(model)
@@ -1023,6 +1026,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
           stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
           stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
           stats.allTokens += parseInt(data.allTokens) || 0
+          if ('realCostMicro' in data || 'ratedCostMicro' in data) {
+            stats.realCostMicro += parseInt(data.realCostMicro) || 0
+            stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
+            stats.hasStoredCost = true
+          }
         }
       }
     } else {
@@ -1059,7 +1067,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
             cacheReadTokens: 0,
             ephemeral5mTokens: 0,
             ephemeral1hTokens: 0,
-            allTokens: 0
+            allTokens: 0,
+            realCostMicro: 0,
+            ratedCostMicro: 0,
+            hasStoredCost: false
           })
         }
         const stats = modelStatsMap.get(model)
@@ -1071,6 +1082,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
         stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
         stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
         stats.allTokens += parseInt(data.allTokens) || 0
+        if ('realCostMicro' in data || 'ratedCostMicro' in data) {
+          stats.realCostMicro += parseInt(data.realCostMicro) || 0
+          stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
+          stats.hasStoredCost = true
+        }
       }
     }
 
@@ -1078,23 +1094,36 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
     for (const [model, stats] of modelStatsMap) {
       logger.info(`📊 Model ${model} aggregated data:`, stats)
 
-      const usage = {
-        input_tokens: stats.inputTokens,
-        output_tokens: stats.outputTokens,
-        cache_creation_input_tokens: stats.cacheCreateTokens,
-        cache_read_input_tokens: stats.cacheReadTokens
-      }
-
-      // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
-      if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
-        usage.cache_creation = {
-          ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
-          ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+      let costData
+      if (stats.hasStoredCost) {
+        // 使用请求时已计算并存储的费用（精确，包含 1M 上下文、Fast Mode 等特殊计费）
+        const ratedCost = stats.ratedCostMicro / 1000000
+        const realCost = stats.realCostMicro / 1000000
+        costData = {
+          costs: { total: ratedCost, real: realCost },
+          formatted: { total: CostCalculator.formatCost(ratedCost) },
+          pricing: null,
+          usingDynamicPricing: false,
+          usingStoredCost: true
+        }
+      } else {
+        // Legacy fallback：旧数据没有存储费用，从 token 重算
+        const usage = {
+          input_tokens: stats.inputTokens,
+          output_tokens: stats.outputTokens,
+          cache_creation_input_tokens: stats.cacheCreateTokens,
+          cache_read_input_tokens: stats.cacheReadTokens
         }
-      }
 
-      // 使用CostCalculator计算费用
-      const costData = CostCalculator.calculateCost(usage, model)
+        if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
+          usage.cache_creation = {
+            ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
+            ephemeral_1h_input_tokens: stats.ephemeral1hTokens
+          }
+        }
+
+        costData = CostCalculator.calculateCost(usage, model)
+      }
 
       modelStats.push({
         model,
@@ -1933,26 +1962,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             continue
           }
 
-          const usage = {
-            input_tokens: parseInt(modelData.inputTokens) || 0,
-            output_tokens: parseInt(modelData.outputTokens) || 0,
-            cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
-            cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
-          }
+          // 优先使用已存储的费用
+          const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData
+          let modelCost = 0
 
-          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
-          const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
-          const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
-          if (eph5m > 0 || eph1h > 0) {
-            usage.cache_creation = {
-              ephemeral_5m_input_tokens: eph5m,
-              ephemeral_1h_input_tokens: eph1h
+          if (hasStoredCost) {
+            modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000
+          } else {
+            // Legacy fallback：旧数据没有存储费用，从 token 重算
+            const usage = {
+              input_tokens: parseInt(modelData.inputTokens) || 0,
+              output_tokens: parseInt(modelData.outputTokens) || 0,
+              cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
+              cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
             }
+
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+            const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+            if (eph5m > 0 || eph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: eph5m,
+                ephemeral_1h_input_tokens: eph1h
+              }
+            }
+
+            const costResult = CostCalculator.calculateCost(usage, model)
+            modelCost = costResult.costs.total
           }
 
-          const costResult = CostCalculator.calculateCost(usage, model)
           const currentCost = apiKeyCostMap.get(apiKeyId) || 0
-          apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
+          apiKeyCostMap.set(apiKeyId, currentCost + modelCost)
         }
 
         // 组合数据
@@ -2111,26 +2151,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
             continue
           }
 
-          const usage = {
-            input_tokens: parseInt(modelData.inputTokens) || 0,
-            output_tokens: parseInt(modelData.outputTokens) || 0,
-            cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
-            cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
-          }
+          // 优先使用已存储的费用
+          const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData
+          let modelCost = 0
 
-          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
-          const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
-          const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
-          if (eph5m > 0 || eph1h > 0) {
-            usage.cache_creation = {
-              ephemeral_5m_input_tokens: eph5m,
-              ephemeral_1h_input_tokens: eph1h
+          if (hasStoredCost) {
+            modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000
+          } else {
+            // Legacy fallback：旧数据没有存储费用，从 token 重算
+            const usage = {
+              input_tokens: parseInt(modelData.inputTokens) || 0,
+              output_tokens: parseInt(modelData.outputTokens) || 0,
+              cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
+              cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
             }
+
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
+            const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
+            if (eph5m > 0 || eph1h > 0) {
+              usage.cache_creation = {
+                ephemeral_5m_input_tokens: eph5m,
+                ephemeral_1h_input_tokens: eph1h
+              }
+            }
+
+            const costResult = CostCalculator.calculateCost(usage, model)
+            modelCost = costResult.costs.total
           }
 
-          const costResult = CostCalculator.calculateCost(usage, model)
           const currentCost = apiKeyCostMap.get(apiKeyId) || 0
-          apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
+          apiKeyCostMap.set(apiKeyId, currentCost + modelCost)
         }
 
         // 组合数据
@@ -2628,7 +2679,7 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
   }
 })
 
-// 获取 API Key 的请求记录时间线
+// 获取 API Key 的请求记��时间线
 router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res) => {
   try {
     const { keyId } = req.params
diff --git a/src/routes/api.js b/src/routes/api.js
index 85ad01f5..67b0ff8b 100644
--- a/src/routes/api.js
+++ b/src/routes/api.js
@@ -33,7 +33,8 @@ function queueRateLimitUpdate(
   model,
   context = '',
   keyId = null,
-  accountType = null
+  accountType = null,
+  preCalculatedCost = null
 ) {
   if (!rateLimitInfo) {
     return Promise.resolve({ totalTokens: 0, totalCost: 0 })
@@ -41,7 +42,14 @@ function queueRateLimitUpdate(
 
   const label = context ? ` (${context})` : ''
 
-  return updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType)
+  return updateRateLimitCounters(
+    rateLimitInfo,
+    usageSummary,
+    model,
+    keyId,
+    accountType,
+    preCalculatedCost
+  )
     .then(({ totalTokens, totalCost }) => {
       if (totalTokens > 0) {
         logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
@@ -492,24 +500,40 @@ async function handleMessagesRequest(req, res) {
 
               apiKeyService
                 .recordUsageWithDetails(_apiKeyId, usageObject, model, usageAccountId, accountType)
+                .then((costs) => {
+                  queueRateLimitUpdate(
+                    _rateLimitInfo,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'claude-stream',
+                    _apiKeyId,
+                    accountType,
+                    costs
+                  )
+                })
                 .catch((error) => {
                   logger.error('❌ Failed to record stream usage:', error)
+                  // Fallback: 仍然更新限流计数（使用 legacy 计算）
+                  queueRateLimitUpdate(
+                    _rateLimitInfo,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'claude-stream',
+                    _apiKeyId,
+                    accountType
+                  )
                 })
 
-              queueRateLimitUpdate(
-                _rateLimitInfo,
-                {
-                  inputTokens,
-                  outputTokens,
-                  cacheCreateTokens,
-                  cacheReadTokens
-                },
-                model,
-                'claude-stream',
-                _apiKeyId,
-                accountType
-              )
-
               usageDataCaptured = true
               logger.api(
                 `📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
@@ -608,24 +632,39 @@ async function handleMessagesRequest(req, res) {
                   usageAccountId,
                   'claude-console'
                 )
+                .then((costs) => {
+                  queueRateLimitUpdate(
+                    _rateLimitInfoConsole,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'claude-console-stream',
+                    _apiKeyIdConsole,
+                    accountType,
+                    costs
+                  )
+                })
                 .catch((error) => {
                   logger.error('❌ Failed to record stream usage:', error)
+                  queueRateLimitUpdate(
+                    _rateLimitInfoConsole,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'claude-console-stream',
+                    _apiKeyIdConsole,
+                    accountType
+                  )
                 })
 
-              queueRateLimitUpdate(
-                _rateLimitInfoConsole,
-                {
-                  inputTokens,
-                  outputTokens,
-                  cacheCreateTokens,
-                  cacheReadTokens
-                },
-                model,
-                'claude-console-stream',
-                _apiKeyIdConsole,
-                accountType
-              )
-
               usageDataCaptured = true
               logger.api(
                 `📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
@@ -674,24 +713,39 @@ async function handleMessagesRequest(req, res) {
                 accountId,
                 'bedrock'
               )
+              .then((costs) => {
+                queueRateLimitUpdate(
+                  _rateLimitInfoBedrock,
+                  {
+                    inputTokens,
+                    outputTokens,
+                    cacheCreateTokens: 0,
+                    cacheReadTokens: 0
+                  },
+                  result.model,
+                  'bedrock-stream',
+                  _apiKeyIdBedrock,
+                  'bedrock',
+                  costs
+                )
+              })
               .catch((error) => {
                 logger.error('❌ Failed to record Bedrock stream usage:', error)
+                queueRateLimitUpdate(
+                  _rateLimitInfoBedrock,
+                  {
+                    inputTokens,
+                    outputTokens,
+                    cacheCreateTokens: 0,
+                    cacheReadTokens: 0
+                  },
+                  result.model,
+                  'bedrock-stream',
+                  _apiKeyIdBedrock,
+                  'bedrock'
+                )
               })
 
-            queueRateLimitUpdate(
-              _rateLimitInfoBedrock,
-              {
-                inputTokens,
-                outputTokens,
-                cacheCreateTokens: 0,
-                cacheReadTokens: 0
-              },
-              result.model,
-              'bedrock-stream',
-              _apiKeyIdBedrock,
-              'bedrock'
-            )
-
             usageDataCaptured = true
             logger.api(
               `📊 Bedrock stream usage recorded - Model: ${result.model}, Input: ${inputTokens}, Output: ${outputTokens}, Total: ${inputTokens + outputTokens} tokens`
@@ -781,24 +835,39 @@ async function handleMessagesRequest(req, res) {
 
               apiKeyService
                 .recordUsageWithDetails(_apiKeyIdCcr, usageObject, model, usageAccountId, 'ccr')
+                .then((costs) => {
+                  queueRateLimitUpdate(
+                    _rateLimitInfoCcr,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'ccr-stream',
+                    _apiKeyIdCcr,
+                    'ccr',
+                    costs
+                  )
+                })
                 .catch((error) => {
                   logger.error('❌ Failed to record CCR stream usage:', error)
+                  queueRateLimitUpdate(
+                    _rateLimitInfoCcr,
+                    {
+                      inputTokens,
+                      outputTokens,
+                      cacheCreateTokens,
+                      cacheReadTokens
+                    },
+                    model,
+                    'ccr-stream',
+                    _apiKeyIdCcr,
+                    'ccr'
+                  )
                 })
 
-              queueRateLimitUpdate(
-                _rateLimitInfoCcr,
-                {
-                  inputTokens,
-                  outputTokens,
-                  cacheCreateTokens,
-                  cacheReadTokens
-                },
-                model,
-                'ccr-stream',
-                _apiKeyIdCcr,
-                'ccr'
-              )
-
               usageDataCaptured = true
               logger.api(
                 `📊 CCR stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
@@ -1143,7 +1212,7 @@ async function handleMessagesRequest(req, res) {
 
           // 记录真实的token使用量（包含模型信息和所有4种token以及账户ID）
           const { accountId: responseAccountId } = response
-          await apiKeyService.recordUsage(
+          const nonStreamCosts = await apiKeyService.recordUsage(
             _apiKeyIdNonStream,
             inputTokens,
             outputTokens,
@@ -1165,7 +1234,8 @@ async function handleMessagesRequest(req, res) {
             model,
             'claude-non-stream',
             _apiKeyIdNonStream,
-            accountType
+            accountType,
+            nonStreamCosts
           )
 
           usageRecorded = true
diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js
index 83645892..776259cb 100644
--- a/src/routes/apiStats.js
+++ b/src/routes/apiStats.js
@@ -277,7 +277,10 @@ router.post('/api/user-stats', async (req, res) => {
                 cacheCreateTokens: 0,
                 cacheReadTokens: 0,
                 ephemeral5mTokens: 0,
-                ephemeral1hTokens: 0
+                ephemeral1hTokens: 0,
+                realCostMicro: 0,
+                ratedCostMicro: 0,
+                hasStoredCost: false
               })
             }
 
@@ -288,28 +291,39 @@ router.post('/api/user-stats', async (req, res) => {
             modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
             modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
             modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
+            if ('realCostMicro' in data || 'ratedCostMicro' in data) {
+              modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0
+              modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
+              modelUsage.hasStoredCost = true
+            }
           }
         }
 
         // 按模型计算费用并汇总
         for (const [model, usage] of modelUsageMap) {
-          const usageData = {
-            input_tokens: usage.inputTokens,
-            output_tokens: usage.outputTokens,
-            cache_creation_input_tokens: usage.cacheCreateTokens,
-            cache_read_input_tokens: usage.cacheReadTokens
-          }
-
-          // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
-          if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
-            usageData.cache_creation = {
-              ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
-              ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+          if (usage.hasStoredCost) {
+            // 使用请求时已存储的费用（精确）
+            totalCost += usage.ratedCostMicro / 1000000
+          } else {
+            // Legacy fallback：旧数据没有存储费用，从 token 重算
+            const usageData = {
+              input_tokens: usage.inputTokens,
+              output_tokens: usage.outputTokens,
+              cache_creation_input_tokens: usage.cacheCreateTokens,
+              cache_read_input_tokens: usage.cacheReadTokens
             }
-          }
 
-          const costResult = CostCalculator.calculateCost(usageData, model)
-          totalCost += costResult.costs.total
+            // 如果有 ephemeral 5m/1h 拆分数据，添加 cache_creation 子对象以实现精确计费
+            if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
+              usageData.cache_creation = {
+                ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
+                ephemeral_1h_input_tokens: usage.ephemeral1hTokens
+              }
+            }
+
+            const costResult = CostCalculator.calculateCost(usageData, model)
+            totalCost += costResult.costs.total
+          }
         }
 
         // 如果没有模型级别的详细数据，回退到总体数据计算
diff --git a/src/routes/openaiClaudeRoutes.js b/src/routes/openaiClaudeRoutes.js
index 9689b296..8086de25 100644
--- a/src/routes/openaiClaudeRoutes.js
+++ b/src/routes/openaiClaudeRoutes.js
@@ -30,7 +30,8 @@ function queueRateLimitUpdate(
   model,
   context = '',
   keyId = null,
-  accountType = null
+  accountType = null,
+  preCalculatedCost = null
 ) {
   if (!rateLimitInfo) {
     return
@@ -38,7 +39,7 @@ function queueRateLimitUpdate(
 
   const label = context ? ` (${context})` : ''
 
-  updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType)
+  updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType, preCalculatedCost)
     .then(({ totalTokens, totalCost }) => {
       if (totalTokens > 0) {
         logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
@@ -306,23 +307,38 @@ async function handleChatCompletion(req, res, apiKeyData) {
               accountId,
               accountType
             )
+            .then((costs) => {
+              queueRateLimitUpdate(
+                req.rateLimitInfo,
+                {
+                  inputTokens: usage.input_tokens || 0,
+                  outputTokens: usage.output_tokens || 0,
+                  cacheCreateTokens,
+                  cacheReadTokens
+                },
+                model,
+                `openai-${accountType}-stream`,
+                req.apiKey?.id,
+                accountType,
+                costs
+              )
+            })
             .catch((error) => {
               logger.error('❌ Failed to record usage:', error)
+              queueRateLimitUpdate(
+                req.rateLimitInfo,
+                {
+                  inputTokens: usage.input_tokens || 0,
+                  outputTokens: usage.output_tokens || 0,
+                  cacheCreateTokens,
+                  cacheReadTokens
+                },
+                model,
+                `openai-${accountType}-stream`,
+                req.apiKey?.id,
+                accountType
+              )
             })
-
-          queueRateLimitUpdate(
-            req.rateLimitInfo,
-            {
-              inputTokens: usage.input_tokens || 0,
-              outputTokens: usage.output_tokens || 0,
-              cacheCreateTokens,
-              cacheReadTokens
-            },
-            model,
-            `openai-${accountType}-stream`,
-            req.apiKey?.id,
-            accountType
-          )
         }
       }
 
@@ -444,23 +460,38 @@ async function handleChatCompletion(req, res, apiKeyData) {
             accountId,
             accountType
           )
+          .then((costs) => {
+            queueRateLimitUpdate(
+              req.rateLimitInfo,
+              {
+                inputTokens: usage.input_tokens || 0,
+                outputTokens: usage.output_tokens || 0,
+                cacheCreateTokens,
+                cacheReadTokens
+              },
+              claudeRequest.model,
+              `openai-${accountType}-non-stream`,
+              req.apiKey?.id,
+              accountType,
+              costs
+            )
+          })
           .catch((error) => {
             logger.error('❌ Failed to record usage:', error)
+            queueRateLimitUpdate(
+              req.rateLimitInfo,
+              {
+                inputTokens: usage.input_tokens || 0,
+                outputTokens: usage.output_tokens || 0,
+                cacheCreateTokens,
+                cacheReadTokens
+              },
+              claudeRequest.model,
+              `openai-${accountType}-non-stream`,
+              req.apiKey?.id,
+              accountType
+            )
           })
-
-        queueRateLimitUpdate(
-          req.rateLimitInfo,
-          {
-            inputTokens: usage.input_tokens || 0,
-            outputTokens: usage.output_tokens || 0,
-            cacheCreateTokens,
-            cacheReadTokens
-          },
-          claudeRequest.model,
-          `openai-${accountType}-non-stream`,
-          req.apiKey?.id,
-          accountType
-        )
       }
 
       // 返回 OpenAI 格式响应
diff --git a/src/routes/openaiRoutes.js b/src/routes/openaiRoutes.js
index b912a253..b3f4105a 100644
--- a/src/routes/openaiRoutes.js
+++ b/src/routes/openaiRoutes.js
@@ -70,7 +70,14 @@ function extractCodexUsageHeaders(headers) {
   return hasData ? snapshot : null
 }
 
-async function applyRateLimitTracking(req, usageSummary, model, context = '', accountType = null) {
+async function applyRateLimitTracking(
+  req,
+  usageSummary,
+  model,
+  context = '',
+  accountType = null,
+  preCalculatedCost = null
+) {
   if (!req.rateLimitInfo) {
     return
   }
@@ -83,7 +90,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '', ac
       usageSummary,
       model,
       req.apiKey?.id,
-      accountType
+      accountType,
+      preCalculatedCost
     )
 
     if (totalTokens > 0) {
@@ -613,7 +621,7 @@ const handleResponses = async (req, res) => {
           // 计算实际输入token（总输入减去缓存部分）
           const actualInputTokens = Math.max(0, totalInputTokens - cacheReadTokens)
 
-          await apiKeyService.recordUsage(
+          const nonStreamCosts = await apiKeyService.recordUsage(
             apiKeyData.id,
             actualInputTokens, // 传递实际输入（不含缓存）
             outputTokens,
@@ -638,7 +646,8 @@ const handleResponses = async (req, res) => {
             },
             actualModel,
             'openai-non-stream',
-            'openai'
+            'openai',
+            nonStreamCosts
           )
         }
 
@@ -729,7 +738,7 @@ const handleResponses = async (req, res) => {
           // 使用响应中的真实 model，如果没有则使用请求中的 model，最后回退到默认值
           const modelToRecord = actualModel || requestedModel || 'gpt-4'
 
-          await apiKeyService.recordUsage(
+          const streamCosts = await apiKeyService.recordUsage(
             apiKeyData.id,
             actualInputTokens, // 传递实际输入（不含缓存）
             outputTokens,
@@ -755,7 +764,8 @@ const handleResponses = async (req, res) => {
             },
             modelToRecord,
             'openai-stream',
-            'openai'
+            'openai',
+            streamCosts
           )
         } catch (error) {
           logger.error('Failed to record OpenAI usage:', error)
diff --git a/src/services/anthropicGeminiBridgeService.js b/src/services/anthropicGeminiBridgeService.js
index c4a855a0..f6ecf40d 100644
--- a/src/services/anthropicGeminiBridgeService.js
+++ b/src/services/anthropicGeminiBridgeService.js
@@ -1805,7 +1805,8 @@ async function applyRateLimitTracking(
   usageSummary,
   model,
   context = '',
-  keyId = null
+  keyId = null,
+  preCalculatedCost = null
 ) {
   if (!rateLimitInfo) {
     return
@@ -1819,7 +1820,8 @@ async function applyRateLimitTracking(
       usageSummary,
       model,
       keyId,
-      'gemini'
+      'gemini',
+      preCalculatedCost
     )
     if (totalTokens > 0) {
       logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
@@ -2135,7 +2137,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
         : mapGeminiFinishReasonToAnthropicStopReason(finishReason)
 
       if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
-        await apiKeyService.recordUsage(
+        const bridgeCosts = await apiKeyService.recordUsage(
           req.apiKey.id,
           inputTokens,
           outputTokens,
@@ -2150,7 +2152,8 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
           { inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
           effectiveModel,
           'anthropic-messages',
-          req.apiKey?.id
+          req.apiKey?.id,
+          bridgeCosts
         )
       }
 
@@ -2675,7 +2678,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
       }
 
       if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
-        await apiKeyService.recordUsage(
+        const bridgeStreamCosts = await apiKeyService.recordUsage(
           req.apiKey.id,
           inputTokens,
           outputTokens,
@@ -2689,7 +2692,9 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
           req.rateLimitInfo,
           { inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
           effectiveModel,
-          'anthropic-messages-stream'
+          'anthropic-messages-stream',
+          req.apiKey?.id,
+          bridgeStreamCosts
         )
       }
     }
diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js
index 2fa88191..53f873a7 100644
--- a/src/services/apiKeyService.js
+++ b/src/services/apiKeyService.js
@@ -1662,8 +1662,11 @@ class ApiKeyService {
       logParts.push(`Total: ${totalTokens} tokens`)
 
       logger.database(`📊 Recorded usage: ${keyId} - ${logParts.join(', ')}`)
+
+      return { realCost, ratedCost }
     } catch (error) {
       logger.error('❌ Failed to record usage:', error)
+      return { realCost: 0, ratedCost: 0 }
     }
   }
 
@@ -1958,8 +1961,11 @@ class ApiKeyService {
         // 发布失败不影响主流程，只记录错误
         logger.warn('⚠️ Failed to publish billing event:', err.message)
       })
+
+      return { realCost: realCostWithDetails, ratedCost: ratedCostWithDetails }
     } catch (error) {
       logger.error('❌ Failed to record usage:', error)
+      return { realCost: 0, ratedCost: 0 }
     }
   }
 
diff --git a/src/services/pricingService.js b/src/services/pricingService.js
index 0eb3b2f6..cd04b5f2 100644
--- a/src/services/pricingService.js
+++ b/src/services/pricingService.js
@@ -528,11 +528,6 @@ class PricingService {
     }
   }
 
-  // Claude Fast Mode 目前仅适用于 Opus 4.6 系列
-  isFastModeEligibleClaudeModel(modelName) {
-    return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
-  }
-
   // 去掉模型名中的 [1m] 后缀，便于价格查找
   stripLongContextSuffix(modelName) {
     if (typeof modelName !== 'string') {
@@ -541,45 +536,6 @@ class PricingService {
     return modelName.replace(/\[1m\]/gi, '').trim()
   }
 
-  // 获取 Fast Mode 对应的价格条目（仅匹配 fast/ 前缀）
-  getFastModePricing(modelName) {
-    if (!this.pricingData || !modelName) {
-      return null
-    }
-
-    const cleanedModelName = this.stripLongContextSuffix(modelName)
-    const exactCandidates = new Set([`fast/${cleanedModelName}`])
-
-    if (cleanedModelName.startsWith('fast/')) {
-      exactCandidates.add(cleanedModelName)
-    }
-
-    for (const candidate of exactCandidates) {
-      if (this.pricingData[candidate]) {
-        logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
-        return this.pricingData[candidate]
-      }
-    }
-
-    const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
-    for (const [key, value] of Object.entries(this.pricingData)) {
-      if (!key.startsWith('fast/')) {
-        continue
-      }
-      const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '')
-      if (
-        normalizedFastKey.includes(normalizedModel) ||
-        normalizedModel.includes(normalizedFastKey)
-      ) {
-        logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
-        return value
-      }
-    }
-
-    logger.debug(`💰 No fast pricing found for model: ${modelName}`)
-    return null
-  }
-
   // 获取 1 小时缓存价格（优先使用 model_pricing.json 中的模型字段）
   getEphemeral1hPricing(modelName, pricing = null) {
     if (
@@ -606,7 +562,7 @@ class PricingService {
 
     // 检查是否是 Opus 系列
     if (modelLower.includes('opus')) {
-      return 0.00003 // $30/MTok
+      return 0.00001 // $10/MTok
     }
 
     // 检查是否是 Sonnet 系列
@@ -616,7 +572,7 @@ class PricingService {
 
     // 检查是否是 Haiku 系列
     if (modelLower.includes('haiku')) {
-      return 0.0000016 // $1.6/MTok
+      return 0.000002 // $2/MTok
     }
 
     // 默认返回 0（未知模型）
@@ -647,15 +603,14 @@ class PricingService {
     const hasFastSpeedSignal =
       responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
       requestSpeed === this.claudeFeatureFlags.fastModeSpeed
-    const isFastModeRequest =
-      hasFastModeBeta &&
-      hasFastSpeedSignal &&
-      this.isFastModeEligibleClaudeModel(normalizedModelName)
+    const isFastModeRequest = hasFastModeBeta && hasFastSpeedSignal
     const standardPricing = this.getModelPricing(modelName)
-    const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
-    const pricing = fastPricing || standardPricing
+    const pricing = standardPricing
     const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
 
+    // Fast Mode 倍率：优先从 provider_specific_entry.fast 读取，默认 6 倍
+    const fastMultiplier = isFastModeRequest ? pricing?.provider_specific_entry?.fast || 6 : 1
+
     // 当 [1m] 模型总输入超过 200K 时，进入 200K+ 计费逻辑
     // 根据 Anthropic 官方文档：当总输入超过 200K 时，整个请求所有 token 类型都使用高档价格
     if (isLongContextModeEnabled && totalInputTokens > 200000) {
@@ -685,11 +640,13 @@ class PricingService {
       (typeof pricing?.litellm_provider === 'string' &&
         pricing.litellm_provider.toLowerCase().includes('anthropic'))
 
-    if (isFastModeRequest && fastPricing) {
-      logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
-    } else if (isFastModeRequest && !fastPricing) {
+    if (isFastModeRequest && fastMultiplier > 1) {
+      logger.info(
+        `🚀 Fast mode ${fastMultiplier}x multiplier applied for ${normalizedModelName} (from provider_specific_entry)`
+      )
+    } else if (isFastModeRequest) {
       logger.warn(
-        `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
+        `⚠️ Fast mode request detected but no fast pricing found for ${normalizedModelName}; fallback to standard profile`
       )
     }
 
@@ -700,7 +657,7 @@ class PricingService {
 
     // 确定实际使用的输入价格（普通或 200K+ 高档价格）
     // Claude 模型在 200K+ 场景下如果缺少官方字段，按 2 倍输入价兜底
-    const actualInputPrice = useLongContextPricing
+    let actualInputPrice = useLongContextPricing
       ? hasInput200kPrice
         ? pricing.input_cost_per_token_above_200k_tokens
         : isClaudeModel
@@ -712,12 +669,18 @@ class PricingService {
     const hasOutput200kPrice =
       pricing.output_cost_per_token_above_200k_tokens !== null &&
       pricing.output_cost_per_token_above_200k_tokens !== undefined
-    const actualOutputPrice = useLongContextPricing
+    let actualOutputPrice = useLongContextPricing
       ? hasOutput200kPrice
         ? pricing.output_cost_per_token_above_200k_tokens
         : baseOutputPrice
       : baseOutputPrice
 
+    // 应用 Fast Mode 倍率（在 200K+ 价格之上叠加）
+    if (fastMultiplier > 1) {
+      actualInputPrice *= fastMultiplier
+      actualOutputPrice *= fastMultiplier
+    }
+
     let actualCacheCreatePrice = 0
     let actualCacheReadPrice = 0
     let actualEphemeral1hPrice = 0
diff --git a/src/services/relay/droidRelayService.js b/src/services/relay/droidRelayService.js
index 6e907651..499fc46d 100644
--- a/src/services/relay/droidRelayService.js
+++ b/src/services/relay/droidRelayService.js
@@ -91,7 +91,14 @@ class DroidRelayService {
     return normalizedBody
   }
 
-  async _applyRateLimitTracking(rateLimitInfo, usageSummary, model, context = '', keyId = null) {
+  async _applyRateLimitTracking(
+    rateLimitInfo,
+    usageSummary,
+    model,
+    context = '',
+    keyId = null,
+    preCalculatedCost = null
+  ) {
     if (!rateLimitInfo) {
       return
     }
@@ -102,7 +109,8 @@ class DroidRelayService {
         usageSummary,
         model,
         keyId,
-        'droid'
+        'droid',
+        preCalculatedCost
       )
 
       if (totalTokens > 0) {
@@ -616,7 +624,7 @@ class DroidRelayService {
 
           // 记录 usage 数据
           if (!skipUsageRecord) {
-            const normalizedUsage = await this._recordUsageFromStreamData(
+            const { normalizedUsage, costs: streamCosts } = await this._recordUsageFromStreamData(
               currentUsageData,
               apiKeyData,
               account,
@@ -635,7 +643,8 @@ class DroidRelayService {
               usageSummary,
               model,
               ' [stream]',
-              keyId
+              keyId,
+              streamCosts
             )
 
             logger.success(`Droid stream completed - Account: ${account.name}`)
@@ -871,8 +880,8 @@ class DroidRelayService {
    */
   async _recordUsageFromStreamData(usageData, apiKeyData, account, model) {
     const normalizedUsage = this._normalizeUsageSnapshot(usageData)
-    await this._recordUsage(apiKeyData, account, model, normalizedUsage)
-    return normalizedUsage
+    const costs = await this._recordUsage(apiKeyData, account, model, normalizedUsage)
+    return { normalizedUsage, costs }
   }
 
   /**
@@ -1234,7 +1243,7 @@ class DroidRelayService {
     const normalizedUsage = this._normalizeUsageSnapshot(usage)
 
     if (!skipUsageRecord) {
-      await this._recordUsage(apiKeyData, account, model, normalizedUsage)
+      const droidCosts = await this._recordUsage(apiKeyData, account, model, normalizedUsage)
 
       const totalTokens = this._getTotalTokens(normalizedUsage)
 
@@ -1256,7 +1265,8 @@ class DroidRelayService {
         usageSummary,
         model,
         endpointLabel,
-        keyId
+        keyId,
+        droidCosts
       )
 
       logger.success(
@@ -1283,15 +1293,22 @@ class DroidRelayService {
 
     if (totalTokens <= 0) {
       logger.debug('🪙 Droid usage 数据为空，跳过记录')
-      return
+      return { realCost: 0, ratedCost: 0 }
     }
 
     try {
       const keyId = apiKeyData?.id
       const accountId = this._extractAccountId(account)
+      let costs = { realCost: 0, ratedCost: 0 }
 
       if (keyId) {
-        await apiKeyService.recordUsageWithDetails(keyId, usageObject, model, accountId, 'droid')
+        costs = await apiKeyService.recordUsageWithDetails(
+          keyId,
+          usageObject,
+          model,
+          accountId,
+          'droid'
+        )
       } else if (accountId) {
         await redis.incrementAccountUsage(
           accountId,
@@ -1307,14 +1324,17 @@ class DroidRelayService {
         )
       } else {
         logger.warn('⚠️ 无法记录 Droid usage：缺少 API Key 和账户标识')
-        return
+        return { realCost: 0, ratedCost: 0 }
       }
 
       logger.debug(
         `📊 Droid usage recorded - Key: ${keyId || 'unknown'}, Account: ${accountId || 'unknown'}, Model: ${model}, Input: ${usageObject.input_tokens || 0}, Output: ${usageObject.output_tokens || 0}, Cache Create: ${usageObject.cache_creation_input_tokens || 0}, Cache Read: ${usageObject.cache_read_input_tokens || 0}, Total: ${totalTokens}`
       )
+
+      return costs
     } catch (error) {
       logger.error('❌ Failed to record Droid usage:', error)
+      return { realCost: 0, ratedCost: 0 }
     }
   }
 
diff --git a/src/utils/rateLimitHelper.js b/src/utils/rateLimitHelper.js
index a7f4db3c..4de07d99 100644
--- a/src/utils/rateLimitHelper.js
+++ b/src/utils/rateLimitHelper.js
@@ -8,12 +8,14 @@ function toNumber(value) {
 }
 
 // keyId 和 accountType 用于计算倍率成本
+// preCalculatedCost: 可选的 { realCost, ratedCost }，由调用方提供以避免重复计算
 async function updateRateLimitCounters(
   rateLimitInfo,
   usageSummary,
   model,
   keyId = null,
-  accountType = null
+  accountType = null,
+  preCalculatedCost = null
 ) {
   if (!rateLimitInfo) {
     return { totalTokens: 0, totalCost: 0, ratedCost: 0 }
@@ -36,47 +38,68 @@ async function updateRateLimitCounters(
   }
 
   let totalCost = 0
-  const usagePayload = {
-    input_tokens: inputTokens,
-    output_tokens: outputTokens,
-    cache_creation_input_tokens: cacheCreateTokens,
-    cache_read_input_tokens: cacheReadTokens
-  }
+  let ratedCost = 0
 
-  try {
-    const costInfo = pricingService.calculateCost(usagePayload, model)
-    const { totalCost: calculatedCost } = costInfo || {}
-    if (typeof calculatedCost === 'number') {
-      totalCost = calculatedCost
+  if (
+    preCalculatedCost &&
+    typeof preCalculatedCost.ratedCost === 'number' &&
+    preCalculatedCost.ratedCost > 0
+  ) {
+    // 使用调用方已计算好的费用（避免重复计算，且能正确处理 1h 缓存、Fast Mode 等特殊计费）
+    // eslint-disable-next-line prefer-destructuring
+    ratedCost = preCalculatedCost.ratedCost
+    totalCost = preCalculatedCost.realCost || 0
+  } else if (
+    preCalculatedCost &&
+    typeof preCalculatedCost.realCost === 'number' &&
+    preCalculatedCost.realCost > 0
+  ) {
+    // 有 realCost 但 ratedCost 为 0 或缺失，使用 realCost
+    totalCost = preCalculatedCost.realCost
+    ratedCost = preCalculatedCost.realCost
+  } else {
+    // Legacy fallback：调用方未提供费用时自行计算（不支持 1h 缓存等特殊计费）
+    const usagePayload = {
+      input_tokens: inputTokens,
+      output_tokens: outputTokens,
+      cache_creation_input_tokens: cacheCreateTokens,
+      cache_read_input_tokens: cacheReadTokens
     }
-  } catch (error) {
-    // 忽略此处错误，后续使用备用计算
-    totalCost = 0
-  }
 
-  if (totalCost === 0) {
     try {
-      const fallback = CostCalculator.calculateCost(usagePayload, model)
-      const { costs } = fallback || {}
-      if (costs && typeof costs.total === 'number') {
-        totalCost = costs.total
+      const costInfo = pricingService.calculateCost(usagePayload, model)
+      const { totalCost: calculatedCost } = costInfo || {}
+      if (typeof calculatedCost === 'number') {
+        totalCost = calculatedCost
       }
     } catch (error) {
+      // 忽略此处错误，后续使用备用计算
       totalCost = 0
     }
-  }
 
-  // 计算倍率成本（用于限流计数）
-  let ratedCost = totalCost
-  if (totalCost > 0 && keyId) {
-    try {
-      const apiKeyService = require('../services/apiKeyService')
-      const serviceRatesService = require('../services/serviceRatesService')
-      const service = serviceRatesService.getService(accountType, model)
-      ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost)
-    } catch (error) {
-      // 倍率计算失败时使用真实成本
-      ratedCost = totalCost
+    if (totalCost === 0) {
+      try {
+        const fallback = CostCalculator.calculateCost(usagePayload, model)
+        const { costs } = fallback || {}
+        if (costs && typeof costs.total === 'number') {
+          totalCost = costs.total
+        }
+      } catch (error) {
+        totalCost = 0
+      }
+    }
+
+    // 计算倍率成本（用于限流计数）
+    ratedCost = totalCost
+    if (totalCost > 0 && keyId) {
+      try {
+        const apiKeyService = require('../services/apiKeyService')
+        const serviceRatesService = require('../services/serviceRatesService')
+        const service = serviceRatesService.getService(accountType, model)
+        ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost)
+      } catch (error) {
+        ratedCost = totalCost
+      }
     }
   }
 
diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js
index 40dc0dd7..f0bc8687 100644
--- a/tests/pricingService.test.js
+++ b/tests/pricingService.test.js
@@ -39,49 +39,27 @@ jest.mock('fs', () => {
 describe('PricingService - 200K+ Long Context Pricing', () => {
   let pricingService
   const fs = require('fs')
+  const path = require('path')
 
-  // 模拟 claude-sonnet-4-20250514 的完整价格数据（来自 model_pricing.json）
-  const mockPricingData = {
-    'claude-sonnet-4-20250514': {
-      input_cost_per_token: 0.000003, // $3/MTok
-      output_cost_per_token: 0.000015, // $15/MTok
-      cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
-      cache_read_input_token_cost: 0.0000003, // $0.30/MTok
-      max_input_tokens: 1000000,
-      // 200K+ 高档价格
-      input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
-      output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
-      cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x)
-      cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x)
-      // 1小时缓存价格
-      cache_creation_input_token_cost_above_1hr: 0.0000075,
-      cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015
-    },
-    // 没有 above_200k 字段的模型
-    'claude-3-haiku-20240307': {
-      input_cost_per_token: 0.00000025,
-      output_cost_per_token: 0.00000125,
-      cache_creation_input_token_cost: 0.0000003,
-      cache_read_input_token_cost: 0.00000003
-    },
-    // Fast Mode 适配测试模型（Opus 4.6）
-    'claude-opus-4-6': {
-      input_cost_per_token: 0.000005,
-      output_cost_per_token: 0.000025,
-      cache_creation_input_token_cost: 0.00000625,
-      cache_read_input_token_cost: 0.0000005,
-      input_cost_per_token_above_200k_tokens: 0.00001,
-      output_cost_per_token_above_200k_tokens: 0.0000375
-    }
-  }
+  // 使用真实的 model_pricing.json 数据（优先 data/，fallback 到 resources/）
+  const realFs = jest.requireActual('fs')
+  const primaryPath = path.join(process.cwd(), 'data', 'model_pricing.json')
+  const fallbackPath = path.join(
+    process.cwd(),
+    'resources',
+    'model-pricing',
+    'model_prices_and_context_window.json'
+  )
+  const pricingFilePath = realFs.existsSync(primaryPath) ? primaryPath : fallbackPath
+  const pricingData = JSON.parse(realFs.readFileSync(pricingFilePath, 'utf8'))
 
   beforeEach(() => {
     // 清除缓存的模块
     jest.resetModules()
 
-    // 配置 fs mock
+    // 配置 fs mock（防止 pricingService 初始化时的文件副作用）
     fs.existsSync.mockReturnValue(true)
-    fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData))
+    fs.readFileSync.mockReturnValue(JSON.stringify(pricingData))
     fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() })
     fs.watchFile.mockImplementation(() => {})
     fs.unwatchFile.mockImplementation(() => {})
@@ -89,8 +67,8 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
     // 重新加载 pricingService
     pricingService = require('../src/services/pricingService')
 
-    // 直接设置价格数据（绕过初始化）
-    pricingService.pricingData = mockPricingData
+    // 直接设置真实价格数据（绕过网络初始化）
+    pricingService.pricingData = pricingData
     pricingService.lastUpdated = new Date()
   })