diff --git a/src/handlers/geminiHandlers.js b/src/handlers/geminiHandlers.js index 40e4a703..9d4d2e26 100644 --- a/src/handlers/geminiHandlers.js +++ b/src/handlers/geminiHandlers.js @@ -209,7 +209,13 @@ function ensureGeminiPermissionMiddleware(req, res, next) { /** * 应用速率限制跟踪 */ -async function applyRateLimitTracking(req, usageSummary, model, context = '') { +async function applyRateLimitTracking( + req, + usageSummary, + model, + context = '', + preCalculatedCost = null +) { if (!req.rateLimitInfo) { return } @@ -222,7 +228,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '') { usageSummary, model, req.apiKey?.id, - 'gemini' + 'gemini', + preCalculatedCost ) if (totalTokens > 0) { @@ -1705,7 +1712,7 @@ async function handleGenerateContent(req, res) { if (response?.response?.usageMetadata) { try { const usage = response.response.usageMetadata - await apiKeyService.recordUsage( + const geminiNonStreamCosts = await apiKeyService.recordUsage( req.apiKey.id, usage.promptTokenCount || 0, usage.candidatesTokenCount || 0, @@ -1728,7 +1735,8 @@ async function handleGenerateContent(req, res) { cacheReadTokens: 0 }, model, - 'gemini-non-stream' + 'gemini-non-stream', + geminiNonStreamCosts ) } catch (error) { logger.error('Failed to record Gemini usage:', error) @@ -2053,8 +2061,8 @@ async function handleStreamGenerateContent(req, res) { // 异步记录使用统计 if (!usageReported && totalUsage.totalTokenCount > 0) { - Promise.all([ - apiKeyService.recordUsage( + apiKeyService + .recordUsage( req.apiKey.id, totalUsage.promptTokenCount || 0, totalUsage.candidatesTokenCount || 0, @@ -2063,19 +2071,21 @@ async function handleStreamGenerateContent(req, res) { model, account.id, 'gemini' - ), - applyRateLimitTracking( - req, - { - inputTokens: totalUsage.promptTokenCount || 0, - outputTokens: totalUsage.candidatesTokenCount || 0, - cacheCreateTokens: 0, - cacheReadTokens: 0 - }, - model, - 'gemini-stream' ) - ]) + .then((costs) => + applyRateLimitTracking( + req, + { + inputTokens: totalUsage.promptTokenCount || 0, + outputTokens: totalUsage.candidatesTokenCount || 0, + cacheCreateTokens: 0, + cacheReadTokens: 0 + }, + model, + 'gemini-stream', + costs + ) + ) .then(() => { logger.info( `📊 Recorded Gemini stream usage - Input: ${totalUsage.promptTokenCount}, Output: ${totalUsage.candidatesTokenCount}, Total: ${totalUsage.totalTokenCount}` diff --git a/src/routes/admin/apiKeys.js b/src/routes/admin/apiKeys.js index c374eb15..cff0fb65 100644 --- a/src/routes/admin/apiKeys.js +++ b/src/routes/admin/apiKeys.js @@ -1093,9 +1093,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}` searchPatterns.push(`usage:${keyId}:model:monthly:*:${currentMonth}`) } else { - // all - 获取所有数据(日和月数据都查) - searchPatterns.push(`usage:${keyId}:model:daily:*`) - searchPatterns.push(`usage:${keyId}:model:monthly:*`) + // all - 使用 alltime key(无 TTL,数据完整),避免 daily/monthly 键过期导致数据丢失 + searchPatterns.push(`usage:${keyId}:model:alltime:*`) } // 使用 SCAN 收集所有匹配的 keys @@ -1109,7 +1108,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { } while (cursor !== '0') } - // 去重(避免日数据和月数据重复计算) + // 去重 const uniqueKeys = [...new Set(allKeys)] // 获取实时限制数据(窗口数据不受时间范围筛选影响,始终获取当前窗口状态) @@ -1128,7 +1127,6 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { const apiKey = await redis.getApiKey(keyId) const rateLimitWindow = parseInt(apiKey?.rateLimitWindow) || 0 const dailyCostLimit = parseFloat(apiKey?.dailyCostLimit) || 0 - const totalCostLimit = parseFloat(apiKey?.totalCostLimit) || 0 const weeklyOpusCostLimit = parseFloat(apiKey?.weeklyOpusCostLimit) || 0 // 只在启用了每日费用限制时查询 @@ -1136,11 +1134,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { dailyCost = await redis.getDailyCost(keyId) } - // 只在启用了总费用限制时查询 - if (totalCostLimit > 0) { - const totalCostKey = `usage:cost:total:${keyId}` - allTimeCost = parseFloat((await client.get(totalCostKey)) || '0') - } + // 始终查询 allTimeCost(用于展示和限额校验) + const totalCostKey = `usage:cost:total:${keyId}` + allTimeCost = parseFloat((await client.get(totalCostKey)) || '0') // 只在启用了 Claude 周费用限制时查询(字段名沿用 weeklyOpusCostLimit) if (weeklyOpusCostLimit > 0) { @@ -1149,7 +1145,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { weeklyOpusCost = await redis.getWeeklyOpusCost(keyId, resetDay, resetHour) } - // 只在启用了窗口限制时查询窗口数据(移到早期返回之前,确保窗口数据始终被获取) + // 只在启用了窗口限制时查询窗口数据 if (rateLimitWindow > 0) { const requestCountKey = `rate_limit:requests:${keyId}` const tokenCountKey = `rate_limit:tokens:${keyId}` @@ -1180,37 +1176,23 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { } } } - - // 🔧 FIX: 对于 "全部时间" 时间范围,直接使用 allTimeCost - // 因为 usage:*:model:daily:* 键有 30 天 TTL,旧数据已经过期 - if (timeRange === 'all' && allTimeCost > 0) { - logger.debug(`📊 使用 allTimeCost 计算 timeRange='all': ${allTimeCost}`) - - return { - requests: 0, // 旧数据详情不可用 - tokens: 0, - inputTokens: 0, - outputTokens: 0, - cacheCreateTokens: 0, - cacheReadTokens: 0, - cost: allTimeCost, - formattedCost: CostCalculator.formatCost(allTimeCost), - // 实时限制数据(始终返回,不受时间范围影响) - dailyCost, - weeklyOpusCost, - currentWindowCost, - currentWindowRequests, - currentWindowTokens, - windowRemainingSeconds, - windowStartTime, - windowEndTime, - allTimeCost - } - } } catch (error) { logger.warn(`⚠️ 获取实时限制数据失败 (key: ${keyId}):`, error.message) } + // 构建实时限制数据对象(各分支复用) + const limitData = { + dailyCost, + weeklyOpusCost, + currentWindowCost, + currentWindowRequests, + currentWindowTokens, + windowRemainingSeconds, + windowStartTime, + windowEndTime, + allTimeCost + } + // 如果没有使用数据,返回零值但包含窗口数据 if (uniqueKeys.length === 0) { return { @@ -1221,17 +1203,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { cacheCreateTokens: 0, cacheReadTokens: 0, cost: 0, + realCost: 0, formattedCost: '$0.00', - // 实时限制数据(始终返回,不受时间范围影响) - dailyCost, - weeklyOpusCost, - currentWindowCost, - currentWindowRequests, - currentWindowTokens, - windowRemainingSeconds, - windowStartTime, - windowEndTime, - allTimeCost + ...limitData } } @@ -1246,10 +1220,13 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { const modelStatsMap = new Map() let totalRequests = 0 + // alltime key 的模式:usage:{keyId}:model:alltime:{model} + const alltimeKeyPattern = /usage:.+:model:alltime:(.+)$/ // 用于去重:先统计月数据,避免与日数据重复 const dailyKeyPattern = /usage:.+:model:daily:(.+):\d{4}-\d{2}-\d{2}$/ const monthlyKeyPattern = /usage:.+:model:monthly:(.+):\d{4}-\d{2}$/ const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}` + const isAlltimeQuery = timeRange === 'all' for (let i = 0; i < results.length; i++) { const [err, data] = results[i] @@ -1262,27 +1239,37 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { let isMonthly = false // 提取模型名称 - const dailyMatch = key.match(dailyKeyPattern) - const monthlyMatch = key.match(monthlyKeyPattern) + if (isAlltimeQuery) { + const alltimeMatch = key.match(alltimeKeyPattern) + if (alltimeMatch) { + model = alltimeMatch[1] + } + } else { + const dailyMatch = key.match(dailyKeyPattern) + const monthlyMatch = key.match(monthlyKeyPattern) - if (dailyMatch) { - model = dailyMatch[1] - } else if (monthlyMatch) { - model = monthlyMatch[1] - isMonthly = true + if (dailyMatch) { + model = dailyMatch[1] + } else if (monthlyMatch) { + model = monthlyMatch[1] + isMonthly = true + } } if (!model) { continue } - // 跳过当前月的月数据 - if (isMonthly && key.includes(`:${currentMonth}`)) { - continue - } - // 跳过非当前月的日数据 - if (!isMonthly && !key.includes(`:${currentMonth}-`)) { - continue + // 日/月去重逻辑(alltime 不需要去重) + if (!isAlltimeQuery) { + // 跳过当前月的月数据(当前月用日数据更精确) + if (isMonthly && key.includes(`:${currentMonth}`)) { + continue + } + // 跳过非当前月的日数据(非当前月用月数据) + if (!isMonthly && !key.includes(`:${currentMonth}-`)) { + continue + } } if (!modelStatsMap.has(model)) { @@ -1293,7 +1280,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { cacheReadTokens: 0, ephemeral5mTokens: 0, ephemeral1hTokens: 0, - requests: 0 + requests: 0, + realCostMicro: 0, + ratedCostMicro: 0, + hasStoredCost: false }) } @@ -1310,11 +1300,19 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0 stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0 + // 累加已存储的费用(微美元) + if ('realCostMicro' in data || 'ratedCostMicro' in data) { + stats.realCostMicro += parseInt(data.realCostMicro) || 0 + stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0 + stats.hasStoredCost = true + } + totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0 } - // 计算费用 - let totalCost = 0 + // 汇总费用:优先使用已存储的费用,仅对无存储费用的旧数据 fallback 到 token 重算 + let totalRatedCost = 0 + let totalRealCost = 0 let inputTokens = 0 let outputTokens = 0 let cacheCreateTokens = 0 @@ -1326,23 +1324,30 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { cacheCreateTokens += stats.cacheCreateTokens cacheReadTokens += stats.cacheReadTokens - const costUsage = { - input_tokens: stats.inputTokens, - output_tokens: stats.outputTokens, - cache_creation_input_tokens: stats.cacheCreateTokens, - cache_read_input_tokens: stats.cacheReadTokens - } - - // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 - if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { - costUsage.cache_creation = { - ephemeral_5m_input_tokens: stats.ephemeral5mTokens, - ephemeral_1h_input_tokens: stats.ephemeral1hTokens + if (stats.hasStoredCost) { + // 使用请求时已计算并存储的费用(精确,包含 1M 上下文、特殊计费等) + totalRatedCost += stats.ratedCostMicro / 1000000 + totalRealCost += stats.realCostMicro / 1000000 + } else { + // Legacy fallback:旧数据没有存储费用,从 token 重算(不精确但聊胜于无) + const costUsage = { + input_tokens: stats.inputTokens, + output_tokens: stats.outputTokens, + cache_creation_input_tokens: stats.cacheCreateTokens, + cache_read_input_tokens: stats.cacheReadTokens } - } - const costResult = CostCalculator.calculateCost(costUsage, model) - totalCost += costResult.costs.total + if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { + costUsage.cache_creation = { + ephemeral_5m_input_tokens: stats.ephemeral5mTokens, + ephemeral_1h_input_tokens: stats.ephemeral1hTokens + } + } + + const costResult = CostCalculator.calculateCost(costUsage, model) + totalRatedCost += costResult.costs.total + totalRealCost += costResult.costs.total + } } const tokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens @@ -1354,18 +1359,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { outputTokens, cacheCreateTokens, cacheReadTokens, - cost: totalCost, - formattedCost: CostCalculator.formatCost(totalCost), - // 实时限制数据 - dailyCost, - weeklyOpusCost, - currentWindowCost, - currentWindowRequests, - currentWindowTokens, - windowRemainingSeconds, - windowStartTime, - windowEndTime, - allTimeCost // 历史总费用(用于总费用限制) + cost: totalRatedCost, + realCost: totalRealCost, + formattedCost: CostCalculator.formatCost(totalRatedCost), + ...limitData } } diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js index c83aad4e..c8b5660b 100644 --- a/src/routes/admin/usageStats.js +++ b/src/routes/admin/usageStats.js @@ -1011,7 +1011,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = cacheReadTokens: 0, ephemeral5mTokens: 0, ephemeral1hTokens: 0, - allTokens: 0 + allTokens: 0, + realCostMicro: 0, + ratedCostMicro: 0, + hasStoredCost: false }) } const stats = modelStatsMap.get(model) @@ -1023,6 +1026,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 stats.allTokens += parseInt(data.allTokens) || 0 + if ('realCostMicro' in data || 'ratedCostMicro' in data) { + stats.realCostMicro += parseInt(data.realCostMicro) || 0 + stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0 + stats.hasStoredCost = true + } } } } else { @@ -1059,7 +1067,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = cacheReadTokens: 0, ephemeral5mTokens: 0, ephemeral1hTokens: 0, - allTokens: 0 + allTokens: 0, + realCostMicro: 0, + ratedCostMicro: 0, + hasStoredCost: false }) } const stats = modelStatsMap.get(model) @@ -1071,6 +1082,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 stats.allTokens += parseInt(data.allTokens) || 0 + if ('realCostMicro' in data || 'ratedCostMicro' in data) { + stats.realCostMicro += parseInt(data.realCostMicro) || 0 + stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0 + stats.hasStoredCost = true + } } } @@ -1078,23 +1094,36 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = for (const [model, stats] of modelStatsMap) { logger.info(`📊 Model ${model} aggregated data:`, stats) - const usage = { - input_tokens: stats.inputTokens, - output_tokens: stats.outputTokens, - cache_creation_input_tokens: stats.cacheCreateTokens, - cache_read_input_tokens: stats.cacheReadTokens - } - - // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 - if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { - usage.cache_creation = { - ephemeral_5m_input_tokens: stats.ephemeral5mTokens, - ephemeral_1h_input_tokens: stats.ephemeral1hTokens + let costData + if (stats.hasStoredCost) { + // 使用请求时已计算并存储的费用(精确,包含 1M 上下文、Fast Mode 等特殊计费) + const ratedCost = stats.ratedCostMicro / 1000000 + const realCost = stats.realCostMicro / 1000000 + costData = { + costs: { total: ratedCost, real: realCost }, + formatted: { total: CostCalculator.formatCost(ratedCost) }, + pricing: null, + usingDynamicPricing: false, + usingStoredCost: true + } + } else { + // Legacy fallback:旧数据没有存储费用,从 token 重算 + const usage = { + input_tokens: stats.inputTokens, + output_tokens: stats.outputTokens, + cache_creation_input_tokens: stats.cacheCreateTokens, + cache_read_input_tokens: stats.cacheReadTokens } - } - // 使用CostCalculator计算费用 - const costData = CostCalculator.calculateCost(usage, model) + if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: stats.ephemeral5mTokens, + ephemeral_1h_input_tokens: stats.ephemeral1hTokens + } + } + + costData = CostCalculator.calculateCost(usage, model) + } modelStats.push({ model, @@ -1933,26 +1962,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { continue } - const usage = { - input_tokens: parseInt(modelData.inputTokens) || 0, - output_tokens: parseInt(modelData.outputTokens) || 0, - cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0, - cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 - } + // 优先使用已存储的费用 + const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData + let modelCost = 0 - // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 - const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 - const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 - if (eph5m > 0 || eph1h > 0) { - usage.cache_creation = { - ephemeral_5m_input_tokens: eph5m, - ephemeral_1h_input_tokens: eph1h + if (hasStoredCost) { + modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000 + } else { + // Legacy fallback:旧数据没有存储费用,从 token 重算 + const usage = { + input_tokens: parseInt(modelData.inputTokens) || 0, + output_tokens: parseInt(modelData.outputTokens) || 0, + cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0, + cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + + const costResult = CostCalculator.calculateCost(usage, model) + modelCost = costResult.costs.total } - const costResult = CostCalculator.calculateCost(usage, model) const currentCost = apiKeyCostMap.get(apiKeyId) || 0 - apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total) + apiKeyCostMap.set(apiKeyId, currentCost + modelCost) } // 组合数据 @@ -2111,26 +2151,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { continue } - const usage = { - input_tokens: parseInt(modelData.inputTokens) || 0, - output_tokens: parseInt(modelData.outputTokens) || 0, - cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0, - cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 - } + // 优先使用已存储的费用 + const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData + let modelCost = 0 - // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 - const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 - const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 - if (eph5m > 0 || eph1h > 0) { - usage.cache_creation = { - ephemeral_5m_input_tokens: eph5m, - ephemeral_1h_input_tokens: eph1h + if (hasStoredCost) { + modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000 + } else { + // Legacy fallback:旧数据没有存储费用,从 token 重算 + const usage = { + input_tokens: parseInt(modelData.inputTokens) || 0, + output_tokens: parseInt(modelData.outputTokens) || 0, + cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0, + cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + + const costResult = CostCalculator.calculateCost(usage, model) + modelCost = costResult.costs.total } - const costResult = CostCalculator.calculateCost(usage, model) const currentCost = apiKeyCostMap.get(apiKeyId) || 0 - apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total) + apiKeyCostMap.set(apiKeyId, currentCost + modelCost) } // 组合数据 @@ -2628,7 +2679,7 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { } }) -// 获取 API Key 的请求记录时间线 +// 获取 API Key 的请求记��时间线 router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res) => { try { const { keyId } = req.params diff --git a/src/routes/api.js b/src/routes/api.js index 85ad01f5..67b0ff8b 100644 --- a/src/routes/api.js +++ b/src/routes/api.js @@ -33,7 +33,8 @@ function queueRateLimitUpdate( model, context = '', keyId = null, - accountType = null + accountType = null, + preCalculatedCost = null ) { if (!rateLimitInfo) { return Promise.resolve({ totalTokens: 0, totalCost: 0 }) @@ -41,7 +42,14 @@ function queueRateLimitUpdate( const label = context ? ` (${context})` : '' - return updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType) + return updateRateLimitCounters( + rateLimitInfo, + usageSummary, + model, + keyId, + accountType, + preCalculatedCost + ) .then(({ totalTokens, totalCost }) => { if (totalTokens > 0) { logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`) @@ -492,24 +500,40 @@ async function handleMessagesRequest(req, res) { apiKeyService .recordUsageWithDetails(_apiKeyId, usageObject, model, usageAccountId, accountType) + .then((costs) => { + queueRateLimitUpdate( + _rateLimitInfo, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'claude-stream', + _apiKeyId, + accountType, + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record stream usage:', error) + // Fallback: 仍然更新限流计数(使用 legacy 计算) + queueRateLimitUpdate( + _rateLimitInfo, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'claude-stream', + _apiKeyId, + accountType + ) }) - queueRateLimitUpdate( - _rateLimitInfo, - { - inputTokens, - outputTokens, - cacheCreateTokens, - cacheReadTokens - }, - model, - 'claude-stream', - _apiKeyId, - accountType - ) - usageDataCaptured = true logger.api( `📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens` @@ -608,24 +632,39 @@ async function handleMessagesRequest(req, res) { usageAccountId, 'claude-console' ) + .then((costs) => { + queueRateLimitUpdate( + _rateLimitInfoConsole, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'claude-console-stream', + _apiKeyIdConsole, + accountType, + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record stream usage:', error) + queueRateLimitUpdate( + _rateLimitInfoConsole, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'claude-console-stream', + _apiKeyIdConsole, + accountType + ) }) - queueRateLimitUpdate( - _rateLimitInfoConsole, - { - inputTokens, - outputTokens, - cacheCreateTokens, - cacheReadTokens - }, - model, - 'claude-console-stream', - _apiKeyIdConsole, - accountType - ) - usageDataCaptured = true logger.api( `📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens` @@ -674,24 +713,39 @@ async function handleMessagesRequest(req, res) { accountId, 'bedrock' ) + .then((costs) => { + queueRateLimitUpdate( + _rateLimitInfoBedrock, + { + inputTokens, + outputTokens, + cacheCreateTokens: 0, + cacheReadTokens: 0 + }, + result.model, + 'bedrock-stream', + _apiKeyIdBedrock, + 'bedrock', + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record Bedrock stream usage:', error) + queueRateLimitUpdate( + _rateLimitInfoBedrock, + { + inputTokens, + outputTokens, + cacheCreateTokens: 0, + cacheReadTokens: 0 + }, + result.model, + 'bedrock-stream', + _apiKeyIdBedrock, + 'bedrock' + ) }) - queueRateLimitUpdate( - _rateLimitInfoBedrock, - { - inputTokens, - outputTokens, - cacheCreateTokens: 0, - cacheReadTokens: 0 - }, - result.model, - 'bedrock-stream', - _apiKeyIdBedrock, - 'bedrock' - ) - usageDataCaptured = true logger.api( `📊 Bedrock stream usage recorded - Model: ${result.model}, Input: ${inputTokens}, Output: ${outputTokens}, Total: ${inputTokens + outputTokens} tokens` @@ -781,24 +835,39 @@ async function handleMessagesRequest(req, res) { apiKeyService .recordUsageWithDetails(_apiKeyIdCcr, usageObject, model, usageAccountId, 'ccr') + .then((costs) => { + queueRateLimitUpdate( + _rateLimitInfoCcr, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'ccr-stream', + _apiKeyIdCcr, + 'ccr', + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record CCR stream usage:', error) + queueRateLimitUpdate( + _rateLimitInfoCcr, + { + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens + }, + model, + 'ccr-stream', + _apiKeyIdCcr, + 'ccr' + ) }) - queueRateLimitUpdate( - _rateLimitInfoCcr, - { - inputTokens, - outputTokens, - cacheCreateTokens, - cacheReadTokens - }, - model, - 'ccr-stream', - _apiKeyIdCcr, - 'ccr' - ) - usageDataCaptured = true logger.api( `📊 CCR stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens` @@ -1143,7 +1212,7 @@ async function handleMessagesRequest(req, res) { // 记录真实的token使用量(包含模型信息和所有4种token以及账户ID) const { accountId: responseAccountId } = response - await apiKeyService.recordUsage( + const nonStreamCosts = await apiKeyService.recordUsage( _apiKeyIdNonStream, inputTokens, outputTokens, @@ -1165,7 +1234,8 @@ async function handleMessagesRequest(req, res) { model, 'claude-non-stream', _apiKeyIdNonStream, - accountType + accountType, + nonStreamCosts ) usageRecorded = true diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js index 83645892..776259cb 100644 --- a/src/routes/apiStats.js +++ b/src/routes/apiStats.js @@ -277,7 +277,10 @@ router.post('/api/user-stats', async (req, res) => { cacheCreateTokens: 0, cacheReadTokens: 0, ephemeral5mTokens: 0, - ephemeral1hTokens: 0 + ephemeral1hTokens: 0, + realCostMicro: 0, + ratedCostMicro: 0, + hasStoredCost: false }) } @@ -288,28 +291,39 @@ router.post('/api/user-stats', async (req, res) => { modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 + if ('realCostMicro' in data || 'ratedCostMicro' in data) { + modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0 + modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0 + modelUsage.hasStoredCost = true + } } } // 按模型计算费用并汇总 for (const [model, usage] of modelUsageMap) { - const usageData = { - input_tokens: usage.inputTokens, - output_tokens: usage.outputTokens, - cache_creation_input_tokens: usage.cacheCreateTokens, - cache_read_input_tokens: usage.cacheReadTokens - } - - // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 - if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { - usageData.cache_creation = { - ephemeral_5m_input_tokens: usage.ephemeral5mTokens, - ephemeral_1h_input_tokens: usage.ephemeral1hTokens + if (usage.hasStoredCost) { + // 使用请求时已存储的费用(精确) + totalCost += usage.ratedCostMicro / 1000000 + } else { + // Legacy fallback:旧数据没有存储费用,从 token 重算 + const usageData = { + input_tokens: usage.inputTokens, + output_tokens: usage.outputTokens, + cache_creation_input_tokens: usage.cacheCreateTokens, + cache_read_input_tokens: usage.cacheReadTokens } - } - const costResult = CostCalculator.calculateCost(usageData, model) - totalCost += costResult.costs.total + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + + const costResult = CostCalculator.calculateCost(usageData, model) + totalCost += costResult.costs.total + } } // 如果没有模型级别的详细数据,回退到总体数据计算 diff --git a/src/routes/openaiClaudeRoutes.js b/src/routes/openaiClaudeRoutes.js index 9689b296..8086de25 100644 --- a/src/routes/openaiClaudeRoutes.js +++ b/src/routes/openaiClaudeRoutes.js @@ -30,7 +30,8 @@ function queueRateLimitUpdate( model, context = '', keyId = null, - accountType = null + accountType = null, + preCalculatedCost = null ) { if (!rateLimitInfo) { return @@ -38,7 +39,7 @@ function queueRateLimitUpdate( const label = context ? ` (${context})` : '' - updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType) + updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType, preCalculatedCost) .then(({ totalTokens, totalCost }) => { if (totalTokens > 0) { logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`) @@ -306,23 +307,38 @@ async function handleChatCompletion(req, res, apiKeyData) { accountId, accountType ) + .then((costs) => { + queueRateLimitUpdate( + req.rateLimitInfo, + { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + cacheCreateTokens, + cacheReadTokens + }, + model, + `openai-${accountType}-stream`, + req.apiKey?.id, + accountType, + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record usage:', error) + queueRateLimitUpdate( + req.rateLimitInfo, + { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + cacheCreateTokens, + cacheReadTokens + }, + model, + `openai-${accountType}-stream`, + req.apiKey?.id, + accountType + ) }) - - queueRateLimitUpdate( - req.rateLimitInfo, - { - inputTokens: usage.input_tokens || 0, - outputTokens: usage.output_tokens || 0, - cacheCreateTokens, - cacheReadTokens - }, - model, - `openai-${accountType}-stream`, - req.apiKey?.id, - accountType - ) } } @@ -444,23 +460,38 @@ async function handleChatCompletion(req, res, apiKeyData) { accountId, accountType ) + .then((costs) => { + queueRateLimitUpdate( + req.rateLimitInfo, + { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + cacheCreateTokens, + cacheReadTokens + }, + claudeRequest.model, + `openai-${accountType}-non-stream`, + req.apiKey?.id, + accountType, + costs + ) + }) .catch((error) => { logger.error('❌ Failed to record usage:', error) + queueRateLimitUpdate( + req.rateLimitInfo, + { + inputTokens: usage.input_tokens || 0, + outputTokens: usage.output_tokens || 0, + cacheCreateTokens, + cacheReadTokens + }, + claudeRequest.model, + `openai-${accountType}-non-stream`, + req.apiKey?.id, + accountType + ) }) - - queueRateLimitUpdate( - req.rateLimitInfo, - { - inputTokens: usage.input_tokens || 0, - outputTokens: usage.output_tokens || 0, - cacheCreateTokens, - cacheReadTokens - }, - claudeRequest.model, - `openai-${accountType}-non-stream`, - req.apiKey?.id, - accountType - ) } // 返回 OpenAI 格式响应 diff --git a/src/routes/openaiRoutes.js b/src/routes/openaiRoutes.js index b912a253..b3f4105a 100644 --- a/src/routes/openaiRoutes.js +++ b/src/routes/openaiRoutes.js @@ -70,7 +70,14 @@ function extractCodexUsageHeaders(headers) { return hasData ? snapshot : null } -async function applyRateLimitTracking(req, usageSummary, model, context = '', accountType = null) { +async function applyRateLimitTracking( + req, + usageSummary, + model, + context = '', + accountType = null, + preCalculatedCost = null +) { if (!req.rateLimitInfo) { return } @@ -83,7 +90,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '', ac usageSummary, model, req.apiKey?.id, - accountType + accountType, + preCalculatedCost ) if (totalTokens > 0) { @@ -613,7 +621,7 @@ const handleResponses = async (req, res) => { // 计算实际输入token(总输入减去缓存部分) const actualInputTokens = Math.max(0, totalInputTokens - cacheReadTokens) - await apiKeyService.recordUsage( + const nonStreamCosts = await apiKeyService.recordUsage( apiKeyData.id, actualInputTokens, // 传递实际输入(不含缓存) outputTokens, @@ -638,7 +646,8 @@ const handleResponses = async (req, res) => { }, actualModel, 'openai-non-stream', - 'openai' + 'openai', + nonStreamCosts ) } @@ -729,7 +738,7 @@ const handleResponses = async (req, res) => { // 使用响应中的真实 model,如果没有则使用请求中的 model,最后回退到默认值 const modelToRecord = actualModel || requestedModel || 'gpt-4' - await apiKeyService.recordUsage( + const streamCosts = await apiKeyService.recordUsage( apiKeyData.id, actualInputTokens, // 传递实际输入(不含缓存) outputTokens, @@ -755,7 +764,8 @@ const handleResponses = async (req, res) => { }, modelToRecord, 'openai-stream', - 'openai' + 'openai', + streamCosts ) } catch (error) { logger.error('Failed to record OpenAI usage:', error) diff --git a/src/services/anthropicGeminiBridgeService.js b/src/services/anthropicGeminiBridgeService.js index c4a855a0..f6ecf40d 100644 --- a/src/services/anthropicGeminiBridgeService.js +++ b/src/services/anthropicGeminiBridgeService.js @@ -1805,7 +1805,8 @@ async function applyRateLimitTracking( usageSummary, model, context = '', - keyId = null + keyId = null, + preCalculatedCost = null ) { if (!rateLimitInfo) { return @@ -1819,7 +1820,8 @@ async function applyRateLimitTracking( usageSummary, model, keyId, - 'gemini' + 'gemini', + preCalculatedCost ) if (totalTokens > 0) { logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`) @@ -2135,7 +2137,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel }) : mapGeminiFinishReasonToAnthropicStopReason(finishReason) if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) { - await apiKeyService.recordUsage( + const bridgeCosts = await apiKeyService.recordUsage( req.apiKey.id, inputTokens, outputTokens, @@ -2150,7 +2152,8 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel }) { inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 }, effectiveModel, 'anthropic-messages', - req.apiKey?.id + req.apiKey?.id, + bridgeCosts ) } @@ -2675,7 +2678,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel }) } if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) { - await apiKeyService.recordUsage( + const bridgeStreamCosts = await apiKeyService.recordUsage( req.apiKey.id, inputTokens, outputTokens, @@ -2689,7 +2692,9 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel }) req.rateLimitInfo, { inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 }, effectiveModel, - 'anthropic-messages-stream' + 'anthropic-messages-stream', + req.apiKey?.id, + bridgeStreamCosts ) } } diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js index 2fa88191..53f873a7 100644 --- a/src/services/apiKeyService.js +++ b/src/services/apiKeyService.js @@ -1662,8 +1662,11 @@ class ApiKeyService { logParts.push(`Total: ${totalTokens} tokens`) logger.database(`📊 Recorded usage: ${keyId} - ${logParts.join(', ')}`) + + return { realCost, ratedCost } } catch (error) { logger.error('❌ Failed to record usage:', error) + return { realCost: 0, ratedCost: 0 } } } @@ -1958,8 +1961,11 @@ class ApiKeyService { // 发布失败不影响主流程,只记录错误 logger.warn('⚠️ Failed to publish billing event:', err.message) }) + + return { realCost: realCostWithDetails, ratedCost: ratedCostWithDetails } } catch (error) { logger.error('❌ Failed to record usage:', error) + return { realCost: 0, ratedCost: 0 } } } diff --git a/src/services/pricingService.js b/src/services/pricingService.js index 0eb3b2f6..cd04b5f2 100644 --- a/src/services/pricingService.js +++ b/src/services/pricingService.js @@ -528,11 +528,6 @@ class PricingService { } } - // Claude Fast Mode 目前仅适用于 Opus 4.6 系列 - isFastModeEligibleClaudeModel(modelName) { - return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6') - } - // 去掉模型名中的 [1m] 后缀,便于价格查找 stripLongContextSuffix(modelName) { if (typeof modelName !== 'string') { @@ -541,45 +536,6 @@ class PricingService { return modelName.replace(/\[1m\]/gi, '').trim() } - // 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀) - getFastModePricing(modelName) { - if (!this.pricingData || !modelName) { - return null - } - - const cleanedModelName = this.stripLongContextSuffix(modelName) - const exactCandidates = new Set([`fast/${cleanedModelName}`]) - - if (cleanedModelName.startsWith('fast/')) { - exactCandidates.add(cleanedModelName) - } - - for (const candidate of exactCandidates) { - if (this.pricingData[candidate]) { - logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`) - return this.pricingData[candidate] - } - } - - const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '') - for (const [key, value] of Object.entries(this.pricingData)) { - if (!key.startsWith('fast/')) { - continue - } - const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '') - if ( - normalizedFastKey.includes(normalizedModel) || - normalizedModel.includes(normalizedFastKey) - ) { - logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`) - return value - } - } - - logger.debug(`💰 No fast pricing found for model: ${modelName}`) - return null - } - // 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段) getEphemeral1hPricing(modelName, pricing = null) { if ( @@ -606,7 +562,7 @@ class PricingService { // 检查是否是 Opus 系列 if (modelLower.includes('opus')) { - return 0.00003 // $30/MTok + return 0.00001 // $10/MTok } // 检查是否是 Sonnet 系列 @@ -616,7 +572,7 @@ class PricingService { // 检查是否是 Haiku 系列 if (modelLower.includes('haiku')) { - return 0.0000016 // $1.6/MTok + return 0.000002 // $2/MTok } // 默认返回 0(未知模型) @@ -647,15 +603,14 @@ class PricingService { const hasFastSpeedSignal = responseSpeed === this.claudeFeatureFlags.fastModeSpeed || requestSpeed === this.claudeFeatureFlags.fastModeSpeed - const isFastModeRequest = - hasFastModeBeta && - hasFastSpeedSignal && - this.isFastModeEligibleClaudeModel(normalizedModelName) + const isFastModeRequest = hasFastModeBeta && hasFastSpeedSignal const standardPricing = this.getModelPricing(modelName) - const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null - const pricing = fastPricing || standardPricing + const pricing = standardPricing const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta + // Fast Mode 倍率:优先从 provider_specific_entry.fast 读取,默认 6 倍 + const fastMultiplier = isFastModeRequest ? pricing?.provider_specific_entry?.fast || 6 : 1 + // 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑 // 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格 if (isLongContextModeEnabled && totalInputTokens > 200000) { @@ -685,11 +640,13 @@ class PricingService { (typeof pricing?.litellm_provider === 'string' && pricing.litellm_provider.toLowerCase().includes('anthropic')) - if (isFastModeRequest && fastPricing) { - logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`) - } else if (isFastModeRequest && !fastPricing) { + if (isFastModeRequest && fastMultiplier > 1) { + logger.info( + `🚀 Fast mode ${fastMultiplier}x multiplier applied for ${normalizedModelName} (from provider_specific_entry)` + ) + } else if (isFastModeRequest) { logger.warn( - `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile` + `⚠️ Fast mode request detected but no fast pricing found for ${normalizedModelName}; fallback to standard profile` ) } @@ -700,7 +657,7 @@ class PricingService { // 确定实际使用的输入价格(普通或 200K+ 高档价格) // Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底 - const actualInputPrice = useLongContextPricing + let actualInputPrice = useLongContextPricing ? hasInput200kPrice ? pricing.input_cost_per_token_above_200k_tokens : isClaudeModel @@ -712,12 +669,18 @@ class PricingService { const hasOutput200kPrice = pricing.output_cost_per_token_above_200k_tokens !== null && pricing.output_cost_per_token_above_200k_tokens !== undefined - const actualOutputPrice = useLongContextPricing + let actualOutputPrice = useLongContextPricing ? hasOutput200kPrice ? pricing.output_cost_per_token_above_200k_tokens : baseOutputPrice : baseOutputPrice + // 应用 Fast Mode 倍率(在 200K+ 价格之上叠加) + if (fastMultiplier > 1) { + actualInputPrice *= fastMultiplier + actualOutputPrice *= fastMultiplier + } + let actualCacheCreatePrice = 0 let actualCacheReadPrice = 0 let actualEphemeral1hPrice = 0 diff --git a/src/services/relay/droidRelayService.js b/src/services/relay/droidRelayService.js index 6e907651..499fc46d 100644 --- a/src/services/relay/droidRelayService.js +++ b/src/services/relay/droidRelayService.js @@ -91,7 +91,14 @@ class DroidRelayService { return normalizedBody } - async _applyRateLimitTracking(rateLimitInfo, usageSummary, model, context = '', keyId = null) { + async _applyRateLimitTracking( + rateLimitInfo, + usageSummary, + model, + context = '', + keyId = null, + preCalculatedCost = null + ) { if (!rateLimitInfo) { return } @@ -102,7 +109,8 @@ class DroidRelayService { usageSummary, model, keyId, - 'droid' + 'droid', + preCalculatedCost ) if (totalTokens > 0) { @@ -616,7 +624,7 @@ class DroidRelayService { // 记录 usage 数据 if (!skipUsageRecord) { - const normalizedUsage = await this._recordUsageFromStreamData( + const { normalizedUsage, costs: streamCosts } = await this._recordUsageFromStreamData( currentUsageData, apiKeyData, account, @@ -635,7 +643,8 @@ class DroidRelayService { usageSummary, model, ' [stream]', - keyId + keyId, + streamCosts ) logger.success(`Droid stream completed - Account: ${account.name}`) @@ -871,8 +880,8 @@ class DroidRelayService { */ async _recordUsageFromStreamData(usageData, apiKeyData, account, model) { const normalizedUsage = this._normalizeUsageSnapshot(usageData) - await this._recordUsage(apiKeyData, account, model, normalizedUsage) - return normalizedUsage + const costs = await this._recordUsage(apiKeyData, account, model, normalizedUsage) + return { normalizedUsage, costs } } /** @@ -1234,7 +1243,7 @@ class DroidRelayService { const normalizedUsage = this._normalizeUsageSnapshot(usage) if (!skipUsageRecord) { - await this._recordUsage(apiKeyData, account, model, normalizedUsage) + const droidCosts = await this._recordUsage(apiKeyData, account, model, normalizedUsage) const totalTokens = this._getTotalTokens(normalizedUsage) @@ -1256,7 +1265,8 @@ class DroidRelayService { usageSummary, model, endpointLabel, - keyId + keyId, + droidCosts ) logger.success( @@ -1283,15 +1293,22 @@ class DroidRelayService { if (totalTokens <= 0) { logger.debug('🪙 Droid usage 数据为空,跳过记录') - return + return { realCost: 0, ratedCost: 0 } } try { const keyId = apiKeyData?.id const accountId = this._extractAccountId(account) + let costs = { realCost: 0, ratedCost: 0 } if (keyId) { - await apiKeyService.recordUsageWithDetails(keyId, usageObject, model, accountId, 'droid') + costs = await apiKeyService.recordUsageWithDetails( + keyId, + usageObject, + model, + accountId, + 'droid' + ) } else if (accountId) { await redis.incrementAccountUsage( accountId, @@ -1307,14 +1324,17 @@ class DroidRelayService { ) } else { logger.warn('⚠️ 无法记录 Droid usage:缺少 API Key 和账户标识') - return + return { realCost: 0, ratedCost: 0 } } logger.debug( `📊 Droid usage recorded - Key: ${keyId || 'unknown'}, Account: ${accountId || 'unknown'}, Model: ${model}, Input: ${usageObject.input_tokens || 0}, Output: ${usageObject.output_tokens || 0}, Cache Create: ${usageObject.cache_creation_input_tokens || 0}, Cache Read: ${usageObject.cache_read_input_tokens || 0}, Total: ${totalTokens}` ) + + return costs } catch (error) { logger.error('❌ Failed to record Droid usage:', error) + return { realCost: 0, ratedCost: 0 } } } diff --git a/src/utils/rateLimitHelper.js b/src/utils/rateLimitHelper.js index a7f4db3c..4de07d99 100644 --- a/src/utils/rateLimitHelper.js +++ b/src/utils/rateLimitHelper.js @@ -8,12 +8,14 @@ function toNumber(value) { } // keyId 和 accountType 用于计算倍率成本 +// preCalculatedCost: 可选的 { realCost, ratedCost },由调用方提供以避免重复计算 async function updateRateLimitCounters( rateLimitInfo, usageSummary, model, keyId = null, - accountType = null + accountType = null, + preCalculatedCost = null ) { if (!rateLimitInfo) { return { totalTokens: 0, totalCost: 0, ratedCost: 0 } @@ -36,47 +38,68 @@ async function updateRateLimitCounters( } let totalCost = 0 - const usagePayload = { - input_tokens: inputTokens, - output_tokens: outputTokens, - cache_creation_input_tokens: cacheCreateTokens, - cache_read_input_tokens: cacheReadTokens - } + let ratedCost = 0 - try { - const costInfo = pricingService.calculateCost(usagePayload, model) - const { totalCost: calculatedCost } = costInfo || {} - if (typeof calculatedCost === 'number') { - totalCost = calculatedCost + if ( + preCalculatedCost && + typeof preCalculatedCost.ratedCost === 'number' && + preCalculatedCost.ratedCost > 0 + ) { + // 使用调用方已计算好的费用(避免重复计算,且能正确处理 1h 缓存、Fast Mode 等特殊计费) + // eslint-disable-next-line prefer-destructuring + ratedCost = preCalculatedCost.ratedCost + totalCost = preCalculatedCost.realCost || 0 + } else if ( + preCalculatedCost && + typeof preCalculatedCost.realCost === 'number' && + preCalculatedCost.realCost > 0 + ) { + // 有 realCost 但 ratedCost 为 0 或缺失,使用 realCost + totalCost = preCalculatedCost.realCost + ratedCost = preCalculatedCost.realCost + } else { + // Legacy fallback:调用方未提供费用时自行计算(不支持 1h 缓存等特殊计费) + const usagePayload = { + input_tokens: inputTokens, + output_tokens: outputTokens, + cache_creation_input_tokens: cacheCreateTokens, + cache_read_input_tokens: cacheReadTokens } - } catch (error) { - // 忽略此处错误,后续使用备用计算 - totalCost = 0 - } - if (totalCost === 0) { try { - const fallback = CostCalculator.calculateCost(usagePayload, model) - const { costs } = fallback || {} - if (costs && typeof costs.total === 'number') { - totalCost = costs.total + const costInfo = pricingService.calculateCost(usagePayload, model) + const { totalCost: calculatedCost } = costInfo || {} + if (typeof calculatedCost === 'number') { + totalCost = calculatedCost } } catch (error) { + // 忽略此处错误,后续使用备用计算 totalCost = 0 } - } - // 计算倍率成本(用于限流计数) - let ratedCost = totalCost - if (totalCost > 0 && keyId) { - try { - const apiKeyService = require('../services/apiKeyService') - const serviceRatesService = require('../services/serviceRatesService') - const service = serviceRatesService.getService(accountType, model) - ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost) - } catch (error) { - // 倍率计算失败时使用真实成本 - ratedCost = totalCost + if (totalCost === 0) { + try { + const fallback = CostCalculator.calculateCost(usagePayload, model) + const { costs } = fallback || {} + if (costs && typeof costs.total === 'number') { + totalCost = costs.total + } + } catch (error) { + totalCost = 0 + } + } + + // 计算倍率成本(用于限流计数) + ratedCost = totalCost + if (totalCost > 0 && keyId) { + try { + const apiKeyService = require('../services/apiKeyService') + const serviceRatesService = require('../services/serviceRatesService') + const service = serviceRatesService.getService(accountType, model) + ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost) + } catch (error) { + ratedCost = totalCost + } } } diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js index 40dc0dd7..f0bc8687 100644 --- a/tests/pricingService.test.js +++ b/tests/pricingService.test.js @@ -39,49 +39,27 @@ jest.mock('fs', () => { describe('PricingService - 200K+ Long Context Pricing', () => { let pricingService const fs = require('fs') + const path = require('path') - // 模拟 claude-sonnet-4-20250514 的完整价格数据(来自 model_pricing.json) - const mockPricingData = { - 'claude-sonnet-4-20250514': { - input_cost_per_token: 0.000003, // $3/MTok - output_cost_per_token: 0.000015, // $15/MTok - cache_creation_input_token_cost: 0.00000375, // $3.75/MTok - cache_read_input_token_cost: 0.0000003, // $0.30/MTok - max_input_tokens: 1000000, - // 200K+ 高档价格 - input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x) - output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x) - cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x) - cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x) - // 1小时缓存价格 - cache_creation_input_token_cost_above_1hr: 0.0000075, - cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015 - }, - // 没有 above_200k 字段的模型 - 'claude-3-haiku-20240307': { - input_cost_per_token: 0.00000025, - output_cost_per_token: 0.00000125, - cache_creation_input_token_cost: 0.0000003, - cache_read_input_token_cost: 0.00000003 - }, - // Fast Mode 适配测试模型(Opus 4.6) - 'claude-opus-4-6': { - input_cost_per_token: 0.000005, - output_cost_per_token: 0.000025, - cache_creation_input_token_cost: 0.00000625, - cache_read_input_token_cost: 0.0000005, - input_cost_per_token_above_200k_tokens: 0.00001, - output_cost_per_token_above_200k_tokens: 0.0000375 - } - } + // 使用真实的 model_pricing.json 数据(优先 data/,fallback 到 resources/) + const realFs = jest.requireActual('fs') + const primaryPath = path.join(process.cwd(), 'data', 'model_pricing.json') + const fallbackPath = path.join( + process.cwd(), + 'resources', + 'model-pricing', + 'model_prices_and_context_window.json' + ) + const pricingFilePath = realFs.existsSync(primaryPath) ? primaryPath : fallbackPath + const pricingData = JSON.parse(realFs.readFileSync(pricingFilePath, 'utf8')) beforeEach(() => { // 清除缓存的模块 jest.resetModules() - // 配置 fs mock + // 配置 fs mock(防止 pricingService 初始化时的文件副作用) fs.existsSync.mockReturnValue(true) - fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData)) + fs.readFileSync.mockReturnValue(JSON.stringify(pricingData)) fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() }) fs.watchFile.mockImplementation(() => {}) fs.unwatchFile.mockImplementation(() => {}) @@ -89,8 +67,8 @@ describe('PricingService - 200K+ Long Context Pricing', () => { // 重新加载 pricingService pricingService = require('../src/services/pricingService') - // 直接设置价格数据(绕过初始化) - pricingService.pricingData = mockPricingData + // 直接设置真实价格数据(绕过网络初始化) + pricingService.pricingData = pricingData pricingService.lastUpdated = new Date() })