mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-04-19 13:28:40 +00:00
Merge pull request #1009 from sczheng189/feature/claude_price_count
claude计费相关修改
This commit is contained in:
@@ -209,7 +209,13 @@ function ensureGeminiPermissionMiddleware(req, res, next) {
|
|||||||
/**
|
/**
|
||||||
* 应用速率限制跟踪
|
* 应用速率限制跟踪
|
||||||
*/
|
*/
|
||||||
async function applyRateLimitTracking(req, usageSummary, model, context = '') {
|
async function applyRateLimitTracking(
|
||||||
|
req,
|
||||||
|
usageSummary,
|
||||||
|
model,
|
||||||
|
context = '',
|
||||||
|
preCalculatedCost = null
|
||||||
|
) {
|
||||||
if (!req.rateLimitInfo) {
|
if (!req.rateLimitInfo) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -222,7 +228,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '') {
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
req.apiKey?.id,
|
req.apiKey?.id,
|
||||||
'gemini'
|
'gemini',
|
||||||
|
preCalculatedCost
|
||||||
)
|
)
|
||||||
|
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
@@ -1705,7 +1712,7 @@ async function handleGenerateContent(req, res) {
|
|||||||
if (response?.response?.usageMetadata) {
|
if (response?.response?.usageMetadata) {
|
||||||
try {
|
try {
|
||||||
const usage = response.response.usageMetadata
|
const usage = response.response.usageMetadata
|
||||||
await apiKeyService.recordUsage(
|
const geminiNonStreamCosts = await apiKeyService.recordUsage(
|
||||||
req.apiKey.id,
|
req.apiKey.id,
|
||||||
usage.promptTokenCount || 0,
|
usage.promptTokenCount || 0,
|
||||||
usage.candidatesTokenCount || 0,
|
usage.candidatesTokenCount || 0,
|
||||||
@@ -1728,7 +1735,8 @@ async function handleGenerateContent(req, res) {
|
|||||||
cacheReadTokens: 0
|
cacheReadTokens: 0
|
||||||
},
|
},
|
||||||
model,
|
model,
|
||||||
'gemini-non-stream'
|
'gemini-non-stream',
|
||||||
|
geminiNonStreamCosts
|
||||||
)
|
)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Failed to record Gemini usage:', error)
|
logger.error('Failed to record Gemini usage:', error)
|
||||||
@@ -2053,8 +2061,8 @@ async function handleStreamGenerateContent(req, res) {
|
|||||||
|
|
||||||
// 异步记录使用统计
|
// 异步记录使用统计
|
||||||
if (!usageReported && totalUsage.totalTokenCount > 0) {
|
if (!usageReported && totalUsage.totalTokenCount > 0) {
|
||||||
Promise.all([
|
apiKeyService
|
||||||
apiKeyService.recordUsage(
|
.recordUsage(
|
||||||
req.apiKey.id,
|
req.apiKey.id,
|
||||||
totalUsage.promptTokenCount || 0,
|
totalUsage.promptTokenCount || 0,
|
||||||
totalUsage.candidatesTokenCount || 0,
|
totalUsage.candidatesTokenCount || 0,
|
||||||
@@ -2063,19 +2071,21 @@ async function handleStreamGenerateContent(req, res) {
|
|||||||
model,
|
model,
|
||||||
account.id,
|
account.id,
|
||||||
'gemini'
|
'gemini'
|
||||||
),
|
|
||||||
applyRateLimitTracking(
|
|
||||||
req,
|
|
||||||
{
|
|
||||||
inputTokens: totalUsage.promptTokenCount || 0,
|
|
||||||
outputTokens: totalUsage.candidatesTokenCount || 0,
|
|
||||||
cacheCreateTokens: 0,
|
|
||||||
cacheReadTokens: 0
|
|
||||||
},
|
|
||||||
model,
|
|
||||||
'gemini-stream'
|
|
||||||
)
|
)
|
||||||
])
|
.then((costs) =>
|
||||||
|
applyRateLimitTracking(
|
||||||
|
req,
|
||||||
|
{
|
||||||
|
inputTokens: totalUsage.promptTokenCount || 0,
|
||||||
|
outputTokens: totalUsage.candidatesTokenCount || 0,
|
||||||
|
cacheCreateTokens: 0,
|
||||||
|
cacheReadTokens: 0
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'gemini-stream',
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
)
|
||||||
.then(() => {
|
.then(() => {
|
||||||
logger.info(
|
logger.info(
|
||||||
`📊 Recorded Gemini stream usage - Input: ${totalUsage.promptTokenCount}, Output: ${totalUsage.candidatesTokenCount}, Total: ${totalUsage.totalTokenCount}`
|
`📊 Recorded Gemini stream usage - Input: ${totalUsage.promptTokenCount}, Output: ${totalUsage.candidatesTokenCount}, Total: ${totalUsage.totalTokenCount}`
|
||||||
|
|||||||
@@ -1093,9 +1093,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
|
const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
|
||||||
searchPatterns.push(`usage:${keyId}:model:monthly:*:${currentMonth}`)
|
searchPatterns.push(`usage:${keyId}:model:monthly:*:${currentMonth}`)
|
||||||
} else {
|
} else {
|
||||||
// all - 获取所有数据(日和月数据都查)
|
// all - 使用 alltime key(无 TTL,数据完整),避免 daily/monthly 键过期导致数据丢失
|
||||||
searchPatterns.push(`usage:${keyId}:model:daily:*`)
|
searchPatterns.push(`usage:${keyId}:model:alltime:*`)
|
||||||
searchPatterns.push(`usage:${keyId}:model:monthly:*`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 使用 SCAN 收集所有匹配的 keys
|
// 使用 SCAN 收集所有匹配的 keys
|
||||||
@@ -1109,7 +1108,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
} while (cursor !== '0')
|
} while (cursor !== '0')
|
||||||
}
|
}
|
||||||
|
|
||||||
// 去重(避免日数据和月数据重复计算)
|
// 去重
|
||||||
const uniqueKeys = [...new Set(allKeys)]
|
const uniqueKeys = [...new Set(allKeys)]
|
||||||
|
|
||||||
// 获取实时限制数据(窗口数据不受时间范围筛选影响,始终获取当前窗口状态)
|
// 获取实时限制数据(窗口数据不受时间范围筛选影响,始终获取当前窗口状态)
|
||||||
@@ -1128,7 +1127,6 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
const apiKey = await redis.getApiKey(keyId)
|
const apiKey = await redis.getApiKey(keyId)
|
||||||
const rateLimitWindow = parseInt(apiKey?.rateLimitWindow) || 0
|
const rateLimitWindow = parseInt(apiKey?.rateLimitWindow) || 0
|
||||||
const dailyCostLimit = parseFloat(apiKey?.dailyCostLimit) || 0
|
const dailyCostLimit = parseFloat(apiKey?.dailyCostLimit) || 0
|
||||||
const totalCostLimit = parseFloat(apiKey?.totalCostLimit) || 0
|
|
||||||
const weeklyOpusCostLimit = parseFloat(apiKey?.weeklyOpusCostLimit) || 0
|
const weeklyOpusCostLimit = parseFloat(apiKey?.weeklyOpusCostLimit) || 0
|
||||||
|
|
||||||
// 只在启用了每日费用限制时查询
|
// 只在启用了每日费用限制时查询
|
||||||
@@ -1136,11 +1134,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
dailyCost = await redis.getDailyCost(keyId)
|
dailyCost = await redis.getDailyCost(keyId)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 只在启用了总费用限制时查询
|
// 始终查询 allTimeCost(用于展示和限额校验)
|
||||||
if (totalCostLimit > 0) {
|
const totalCostKey = `usage:cost:total:${keyId}`
|
||||||
const totalCostKey = `usage:cost:total:${keyId}`
|
allTimeCost = parseFloat((await client.get(totalCostKey)) || '0')
|
||||||
allTimeCost = parseFloat((await client.get(totalCostKey)) || '0')
|
|
||||||
}
|
|
||||||
|
|
||||||
// 只在启用了 Claude 周费用限制时查询(字段名沿用 weeklyOpusCostLimit)
|
// 只在启用了 Claude 周费用限制时查询(字段名沿用 weeklyOpusCostLimit)
|
||||||
if (weeklyOpusCostLimit > 0) {
|
if (weeklyOpusCostLimit > 0) {
|
||||||
@@ -1149,7 +1145,7 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
weeklyOpusCost = await redis.getWeeklyOpusCost(keyId, resetDay, resetHour)
|
weeklyOpusCost = await redis.getWeeklyOpusCost(keyId, resetDay, resetHour)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 只在启用了窗口限制时查询窗口数据(移到早期返回之前,确保窗口数据始终被获取)
|
// 只在启用了窗口限制时查询窗口数据
|
||||||
if (rateLimitWindow > 0) {
|
if (rateLimitWindow > 0) {
|
||||||
const requestCountKey = `rate_limit:requests:${keyId}`
|
const requestCountKey = `rate_limit:requests:${keyId}`
|
||||||
const tokenCountKey = `rate_limit:tokens:${keyId}`
|
const tokenCountKey = `rate_limit:tokens:${keyId}`
|
||||||
@@ -1180,37 +1176,23 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 🔧 FIX: 对于 "全部时间" 时间范围,直接使用 allTimeCost
|
|
||||||
// 因为 usage:*:model:daily:* 键有 30 天 TTL,旧数据已经过期
|
|
||||||
if (timeRange === 'all' && allTimeCost > 0) {
|
|
||||||
logger.debug(`📊 使用 allTimeCost 计算 timeRange='all': ${allTimeCost}`)
|
|
||||||
|
|
||||||
return {
|
|
||||||
requests: 0, // 旧数据详情不可用
|
|
||||||
tokens: 0,
|
|
||||||
inputTokens: 0,
|
|
||||||
outputTokens: 0,
|
|
||||||
cacheCreateTokens: 0,
|
|
||||||
cacheReadTokens: 0,
|
|
||||||
cost: allTimeCost,
|
|
||||||
formattedCost: CostCalculator.formatCost(allTimeCost),
|
|
||||||
// 实时限制数据(始终返回,不受时间范围影响)
|
|
||||||
dailyCost,
|
|
||||||
weeklyOpusCost,
|
|
||||||
currentWindowCost,
|
|
||||||
currentWindowRequests,
|
|
||||||
currentWindowTokens,
|
|
||||||
windowRemainingSeconds,
|
|
||||||
windowStartTime,
|
|
||||||
windowEndTime,
|
|
||||||
allTimeCost
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warn(`⚠️ 获取实时限制数据失败 (key: ${keyId}):`, error.message)
|
logger.warn(`⚠️ 获取实时限制数据失败 (key: ${keyId}):`, error.message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 构建实时限制数据对象(各分支复用)
|
||||||
|
const limitData = {
|
||||||
|
dailyCost,
|
||||||
|
weeklyOpusCost,
|
||||||
|
currentWindowCost,
|
||||||
|
currentWindowRequests,
|
||||||
|
currentWindowTokens,
|
||||||
|
windowRemainingSeconds,
|
||||||
|
windowStartTime,
|
||||||
|
windowEndTime,
|
||||||
|
allTimeCost
|
||||||
|
}
|
||||||
|
|
||||||
// 如果没有使用数据,返回零值但包含窗口数据
|
// 如果没有使用数据,返回零值但包含窗口数据
|
||||||
if (uniqueKeys.length === 0) {
|
if (uniqueKeys.length === 0) {
|
||||||
return {
|
return {
|
||||||
@@ -1221,17 +1203,9 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
cacheCreateTokens: 0,
|
cacheCreateTokens: 0,
|
||||||
cacheReadTokens: 0,
|
cacheReadTokens: 0,
|
||||||
cost: 0,
|
cost: 0,
|
||||||
|
realCost: 0,
|
||||||
formattedCost: '$0.00',
|
formattedCost: '$0.00',
|
||||||
// 实时限制数据(始终返回,不受时间范围影响)
|
...limitData
|
||||||
dailyCost,
|
|
||||||
weeklyOpusCost,
|
|
||||||
currentWindowCost,
|
|
||||||
currentWindowRequests,
|
|
||||||
currentWindowTokens,
|
|
||||||
windowRemainingSeconds,
|
|
||||||
windowStartTime,
|
|
||||||
windowEndTime,
|
|
||||||
allTimeCost
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1246,10 +1220,13 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
const modelStatsMap = new Map()
|
const modelStatsMap = new Map()
|
||||||
let totalRequests = 0
|
let totalRequests = 0
|
||||||
|
|
||||||
|
// alltime key 的模式:usage:{keyId}:model:alltime:{model}
|
||||||
|
const alltimeKeyPattern = /usage:.+:model:alltime:(.+)$/
|
||||||
// 用于去重:先统计月数据,避免与日数据重复
|
// 用于去重:先统计月数据,避免与日数据重复
|
||||||
const dailyKeyPattern = /usage:.+:model:daily:(.+):\d{4}-\d{2}-\d{2}$/
|
const dailyKeyPattern = /usage:.+:model:daily:(.+):\d{4}-\d{2}-\d{2}$/
|
||||||
const monthlyKeyPattern = /usage:.+:model:monthly:(.+):\d{4}-\d{2}$/
|
const monthlyKeyPattern = /usage:.+:model:monthly:(.+):\d{4}-\d{2}$/
|
||||||
const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
|
const currentMonth = `${tzDate.getUTCFullYear()}-${String(tzDate.getUTCMonth() + 1).padStart(2, '0')}`
|
||||||
|
const isAlltimeQuery = timeRange === 'all'
|
||||||
|
|
||||||
for (let i = 0; i < results.length; i++) {
|
for (let i = 0; i < results.length; i++) {
|
||||||
const [err, data] = results[i]
|
const [err, data] = results[i]
|
||||||
@@ -1262,27 +1239,37 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
let isMonthly = false
|
let isMonthly = false
|
||||||
|
|
||||||
// 提取模型名称
|
// 提取模型名称
|
||||||
const dailyMatch = key.match(dailyKeyPattern)
|
if (isAlltimeQuery) {
|
||||||
const monthlyMatch = key.match(monthlyKeyPattern)
|
const alltimeMatch = key.match(alltimeKeyPattern)
|
||||||
|
if (alltimeMatch) {
|
||||||
|
model = alltimeMatch[1]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const dailyMatch = key.match(dailyKeyPattern)
|
||||||
|
const monthlyMatch = key.match(monthlyKeyPattern)
|
||||||
|
|
||||||
if (dailyMatch) {
|
if (dailyMatch) {
|
||||||
model = dailyMatch[1]
|
model = dailyMatch[1]
|
||||||
} else if (monthlyMatch) {
|
} else if (monthlyMatch) {
|
||||||
model = monthlyMatch[1]
|
model = monthlyMatch[1]
|
||||||
isMonthly = true
|
isMonthly = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!model) {
|
if (!model) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// 跳过当前月的月数据
|
// 日/月去重逻辑(alltime 不需要去重)
|
||||||
if (isMonthly && key.includes(`:${currentMonth}`)) {
|
if (!isAlltimeQuery) {
|
||||||
continue
|
// 跳过当前月的月数据(当前月用日数据更精确)
|
||||||
}
|
if (isMonthly && key.includes(`:${currentMonth}`)) {
|
||||||
// 跳过非当前月的日数据
|
continue
|
||||||
if (!isMonthly && !key.includes(`:${currentMonth}-`)) {
|
}
|
||||||
continue
|
// 跳过非当前月的日数据(非当前月用月数据)
|
||||||
|
if (!isMonthly && !key.includes(`:${currentMonth}-`)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!modelStatsMap.has(model)) {
|
if (!modelStatsMap.has(model)) {
|
||||||
@@ -1293,7 +1280,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
cacheReadTokens: 0,
|
cacheReadTokens: 0,
|
||||||
ephemeral5mTokens: 0,
|
ephemeral5mTokens: 0,
|
||||||
ephemeral1hTokens: 0,
|
ephemeral1hTokens: 0,
|
||||||
requests: 0
|
requests: 0,
|
||||||
|
realCostMicro: 0,
|
||||||
|
ratedCostMicro: 0,
|
||||||
|
hasStoredCost: false
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1310,11 +1300,19 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
|
parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
|
||||||
stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
||||||
|
|
||||||
|
// 累加已存储的费用(微美元)
|
||||||
|
if ('realCostMicro' in data || 'ratedCostMicro' in data) {
|
||||||
|
stats.realCostMicro += parseInt(data.realCostMicro) || 0
|
||||||
|
stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
|
||||||
|
stats.hasStoredCost = true
|
||||||
|
}
|
||||||
|
|
||||||
totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// 计算费用
|
// 汇总费用:优先使用已存储的费用,仅对无存储费用的旧数据 fallback 到 token 重算
|
||||||
let totalCost = 0
|
let totalRatedCost = 0
|
||||||
|
let totalRealCost = 0
|
||||||
let inputTokens = 0
|
let inputTokens = 0
|
||||||
let outputTokens = 0
|
let outputTokens = 0
|
||||||
let cacheCreateTokens = 0
|
let cacheCreateTokens = 0
|
||||||
@@ -1326,23 +1324,30 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
cacheCreateTokens += stats.cacheCreateTokens
|
cacheCreateTokens += stats.cacheCreateTokens
|
||||||
cacheReadTokens += stats.cacheReadTokens
|
cacheReadTokens += stats.cacheReadTokens
|
||||||
|
|
||||||
const costUsage = {
|
if (stats.hasStoredCost) {
|
||||||
input_tokens: stats.inputTokens,
|
// 使用请求时已计算并存储的费用(精确,包含 1M 上下文、特殊计费等)
|
||||||
output_tokens: stats.outputTokens,
|
totalRatedCost += stats.ratedCostMicro / 1000000
|
||||||
cache_creation_input_tokens: stats.cacheCreateTokens,
|
totalRealCost += stats.realCostMicro / 1000000
|
||||||
cache_read_input_tokens: stats.cacheReadTokens
|
} else {
|
||||||
}
|
// Legacy fallback:旧数据没有存储费用,从 token 重算(不精确但聊胜于无)
|
||||||
|
const costUsage = {
|
||||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
input_tokens: stats.inputTokens,
|
||||||
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
output_tokens: stats.outputTokens,
|
||||||
costUsage.cache_creation = {
|
cache_creation_input_tokens: stats.cacheCreateTokens,
|
||||||
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
cache_read_input_tokens: stats.cacheReadTokens
|
||||||
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const costResult = CostCalculator.calculateCost(costUsage, model)
|
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
||||||
totalCost += costResult.costs.total
|
costUsage.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
||||||
|
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const costResult = CostCalculator.calculateCost(costUsage, model)
|
||||||
|
totalRatedCost += costResult.costs.total
|
||||||
|
totalRealCost += costResult.costs.total
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const tokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens
|
const tokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens
|
||||||
@@ -1354,18 +1359,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
|||||||
outputTokens,
|
outputTokens,
|
||||||
cacheCreateTokens,
|
cacheCreateTokens,
|
||||||
cacheReadTokens,
|
cacheReadTokens,
|
||||||
cost: totalCost,
|
cost: totalRatedCost,
|
||||||
formattedCost: CostCalculator.formatCost(totalCost),
|
realCost: totalRealCost,
|
||||||
// 实时限制数据
|
formattedCost: CostCalculator.formatCost(totalRatedCost),
|
||||||
dailyCost,
|
...limitData
|
||||||
weeklyOpusCost,
|
|
||||||
currentWindowCost,
|
|
||||||
currentWindowRequests,
|
|
||||||
currentWindowTokens,
|
|
||||||
windowRemainingSeconds,
|
|
||||||
windowStartTime,
|
|
||||||
windowEndTime,
|
|
||||||
allTimeCost // 历史总费用(用于总费用限制)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1011,7 +1011,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
|||||||
cacheReadTokens: 0,
|
cacheReadTokens: 0,
|
||||||
ephemeral5mTokens: 0,
|
ephemeral5mTokens: 0,
|
||||||
ephemeral1hTokens: 0,
|
ephemeral1hTokens: 0,
|
||||||
allTokens: 0
|
allTokens: 0,
|
||||||
|
realCostMicro: 0,
|
||||||
|
ratedCostMicro: 0,
|
||||||
|
hasStoredCost: false
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
const stats = modelStatsMap.get(model)
|
const stats = modelStatsMap.get(model)
|
||||||
@@ -1023,6 +1026,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
|||||||
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||||
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||||
stats.allTokens += parseInt(data.allTokens) || 0
|
stats.allTokens += parseInt(data.allTokens) || 0
|
||||||
|
if ('realCostMicro' in data || 'ratedCostMicro' in data) {
|
||||||
|
stats.realCostMicro += parseInt(data.realCostMicro) || 0
|
||||||
|
stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
|
||||||
|
stats.hasStoredCost = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -1059,7 +1067,10 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
|||||||
cacheReadTokens: 0,
|
cacheReadTokens: 0,
|
||||||
ephemeral5mTokens: 0,
|
ephemeral5mTokens: 0,
|
||||||
ephemeral1hTokens: 0,
|
ephemeral1hTokens: 0,
|
||||||
allTokens: 0
|
allTokens: 0,
|
||||||
|
realCostMicro: 0,
|
||||||
|
ratedCostMicro: 0,
|
||||||
|
hasStoredCost: false
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
const stats = modelStatsMap.get(model)
|
const stats = modelStatsMap.get(model)
|
||||||
@@ -1071,6 +1082,11 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
|||||||
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||||
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||||
stats.allTokens += parseInt(data.allTokens) || 0
|
stats.allTokens += parseInt(data.allTokens) || 0
|
||||||
|
if ('realCostMicro' in data || 'ratedCostMicro' in data) {
|
||||||
|
stats.realCostMicro += parseInt(data.realCostMicro) || 0
|
||||||
|
stats.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
|
||||||
|
stats.hasStoredCost = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1078,23 +1094,36 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
|||||||
for (const [model, stats] of modelStatsMap) {
|
for (const [model, stats] of modelStatsMap) {
|
||||||
logger.info(`📊 Model ${model} aggregated data:`, stats)
|
logger.info(`📊 Model ${model} aggregated data:`, stats)
|
||||||
|
|
||||||
const usage = {
|
let costData
|
||||||
input_tokens: stats.inputTokens,
|
if (stats.hasStoredCost) {
|
||||||
output_tokens: stats.outputTokens,
|
// 使用请求时已计算并存储的费用(精确,包含 1M 上下文、Fast Mode 等特殊计费)
|
||||||
cache_creation_input_tokens: stats.cacheCreateTokens,
|
const ratedCost = stats.ratedCostMicro / 1000000
|
||||||
cache_read_input_tokens: stats.cacheReadTokens
|
const realCost = stats.realCostMicro / 1000000
|
||||||
}
|
costData = {
|
||||||
|
costs: { total: ratedCost, real: realCost },
|
||||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
formatted: { total: CostCalculator.formatCost(ratedCost) },
|
||||||
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
pricing: null,
|
||||||
usage.cache_creation = {
|
usingDynamicPricing: false,
|
||||||
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
usingStoredCost: true
|
||||||
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
}
|
||||||
|
} else {
|
||||||
|
// Legacy fallback:旧数据没有存储费用,从 token 重算
|
||||||
|
const usage = {
|
||||||
|
input_tokens: stats.inputTokens,
|
||||||
|
output_tokens: stats.outputTokens,
|
||||||
|
cache_creation_input_tokens: stats.cacheCreateTokens,
|
||||||
|
cache_read_input_tokens: stats.cacheReadTokens
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 使用CostCalculator计算费用
|
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
||||||
const costData = CostCalculator.calculateCost(usage, model)
|
usage.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
||||||
|
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
costData = CostCalculator.calculateCost(usage, model)
|
||||||
|
}
|
||||||
|
|
||||||
modelStats.push({
|
modelStats.push({
|
||||||
model,
|
model,
|
||||||
@@ -1933,26 +1962,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
const usage = {
|
// 优先使用已存储的费用
|
||||||
input_tokens: parseInt(modelData.inputTokens) || 0,
|
const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData
|
||||||
output_tokens: parseInt(modelData.outputTokens) || 0,
|
let modelCost = 0
|
||||||
cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
|
|
||||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
if (hasStoredCost) {
|
||||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000
|
||||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
} else {
|
||||||
if (eph5m > 0 || eph1h > 0) {
|
// Legacy fallback:旧数据没有存储费用,从 token 重算
|
||||||
usage.cache_creation = {
|
const usage = {
|
||||||
ephemeral_5m_input_tokens: eph5m,
|
input_tokens: parseInt(modelData.inputTokens) || 0,
|
||||||
ephemeral_1h_input_tokens: eph1h
|
output_tokens: parseInt(modelData.outputTokens) || 0,
|
||||||
|
cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
|
||||||
|
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||||
|
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||||
|
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||||
|
if (eph5m > 0 || eph1h > 0) {
|
||||||
|
usage.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: eph5m,
|
||||||
|
ephemeral_1h_input_tokens: eph1h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const costResult = CostCalculator.calculateCost(usage, model)
|
||||||
|
modelCost = costResult.costs.total
|
||||||
}
|
}
|
||||||
|
|
||||||
const costResult = CostCalculator.calculateCost(usage, model)
|
|
||||||
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
||||||
apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
|
apiKeyCostMap.set(apiKeyId, currentCost + modelCost)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 组合数据
|
// 组合数据
|
||||||
@@ -2111,26 +2151,37 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
const usage = {
|
// 优先使用已存储的费用
|
||||||
input_tokens: parseInt(modelData.inputTokens) || 0,
|
const hasStoredCost = 'realCostMicro' in modelData || 'ratedCostMicro' in modelData
|
||||||
output_tokens: parseInt(modelData.outputTokens) || 0,
|
let modelCost = 0
|
||||||
cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
|
|
||||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
if (hasStoredCost) {
|
||||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
modelCost = (parseInt(modelData.ratedCostMicro) || 0) / 1000000
|
||||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
} else {
|
||||||
if (eph5m > 0 || eph1h > 0) {
|
// Legacy fallback:旧数据没有存储费用,从 token 重算
|
||||||
usage.cache_creation = {
|
const usage = {
|
||||||
ephemeral_5m_input_tokens: eph5m,
|
input_tokens: parseInt(modelData.inputTokens) || 0,
|
||||||
ephemeral_1h_input_tokens: eph1h
|
output_tokens: parseInt(modelData.outputTokens) || 0,
|
||||||
|
cache_creation_input_tokens: parseInt(modelData.cacheCreateTokens) || 0,
|
||||||
|
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||||
|
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||||
|
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||||
|
if (eph5m > 0 || eph1h > 0) {
|
||||||
|
usage.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: eph5m,
|
||||||
|
ephemeral_1h_input_tokens: eph1h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const costResult = CostCalculator.calculateCost(usage, model)
|
||||||
|
modelCost = costResult.costs.total
|
||||||
}
|
}
|
||||||
|
|
||||||
const costResult = CostCalculator.calculateCost(usage, model)
|
|
||||||
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
||||||
apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
|
apiKeyCostMap.set(apiKeyId, currentCost + modelCost)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 组合数据
|
// 组合数据
|
||||||
@@ -2628,7 +2679,7 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// 获取 API Key 的请求记录时间线
|
// 获取 API Key 的请求记<EFBFBD><EFBFBD>时间线
|
||||||
router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res) => {
|
router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { keyId } = req.params
|
const { keyId } = req.params
|
||||||
|
|||||||
@@ -33,7 +33,8 @@ function queueRateLimitUpdate(
|
|||||||
model,
|
model,
|
||||||
context = '',
|
context = '',
|
||||||
keyId = null,
|
keyId = null,
|
||||||
accountType = null
|
accountType = null,
|
||||||
|
preCalculatedCost = null
|
||||||
) {
|
) {
|
||||||
if (!rateLimitInfo) {
|
if (!rateLimitInfo) {
|
||||||
return Promise.resolve({ totalTokens: 0, totalCost: 0 })
|
return Promise.resolve({ totalTokens: 0, totalCost: 0 })
|
||||||
@@ -41,7 +42,14 @@ function queueRateLimitUpdate(
|
|||||||
|
|
||||||
const label = context ? ` (${context})` : ''
|
const label = context ? ` (${context})` : ''
|
||||||
|
|
||||||
return updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType)
|
return updateRateLimitCounters(
|
||||||
|
rateLimitInfo,
|
||||||
|
usageSummary,
|
||||||
|
model,
|
||||||
|
keyId,
|
||||||
|
accountType,
|
||||||
|
preCalculatedCost
|
||||||
|
)
|
||||||
.then(({ totalTokens, totalCost }) => {
|
.then(({ totalTokens, totalCost }) => {
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
||||||
@@ -492,24 +500,40 @@ async function handleMessagesRequest(req, res) {
|
|||||||
|
|
||||||
apiKeyService
|
apiKeyService
|
||||||
.recordUsageWithDetails(_apiKeyId, usageObject, model, usageAccountId, accountType)
|
.recordUsageWithDetails(_apiKeyId, usageObject, model, usageAccountId, accountType)
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'claude-stream',
|
||||||
|
_apiKeyId,
|
||||||
|
accountType,
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record stream usage:', error)
|
logger.error('❌ Failed to record stream usage:', error)
|
||||||
|
// Fallback: 仍然更新限流计数(使用 legacy 计算)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'claude-stream',
|
||||||
|
_apiKeyId,
|
||||||
|
accountType
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
_rateLimitInfo,
|
|
||||||
{
|
|
||||||
inputTokens,
|
|
||||||
outputTokens,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens
|
|
||||||
},
|
|
||||||
model,
|
|
||||||
'claude-stream',
|
|
||||||
_apiKeyId,
|
|
||||||
accountType
|
|
||||||
)
|
|
||||||
|
|
||||||
usageDataCaptured = true
|
usageDataCaptured = true
|
||||||
logger.api(
|
logger.api(
|
||||||
`📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
`📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||||
@@ -608,24 +632,39 @@ async function handleMessagesRequest(req, res) {
|
|||||||
usageAccountId,
|
usageAccountId,
|
||||||
'claude-console'
|
'claude-console'
|
||||||
)
|
)
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoConsole,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'claude-console-stream',
|
||||||
|
_apiKeyIdConsole,
|
||||||
|
accountType,
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record stream usage:', error)
|
logger.error('❌ Failed to record stream usage:', error)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoConsole,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'claude-console-stream',
|
||||||
|
_apiKeyIdConsole,
|
||||||
|
accountType
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
_rateLimitInfoConsole,
|
|
||||||
{
|
|
||||||
inputTokens,
|
|
||||||
outputTokens,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens
|
|
||||||
},
|
|
||||||
model,
|
|
||||||
'claude-console-stream',
|
|
||||||
_apiKeyIdConsole,
|
|
||||||
accountType
|
|
||||||
)
|
|
||||||
|
|
||||||
usageDataCaptured = true
|
usageDataCaptured = true
|
||||||
logger.api(
|
logger.api(
|
||||||
`📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
`📊 Stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||||
@@ -674,24 +713,39 @@ async function handleMessagesRequest(req, res) {
|
|||||||
accountId,
|
accountId,
|
||||||
'bedrock'
|
'bedrock'
|
||||||
)
|
)
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoBedrock,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens: 0,
|
||||||
|
cacheReadTokens: 0
|
||||||
|
},
|
||||||
|
result.model,
|
||||||
|
'bedrock-stream',
|
||||||
|
_apiKeyIdBedrock,
|
||||||
|
'bedrock',
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record Bedrock stream usage:', error)
|
logger.error('❌ Failed to record Bedrock stream usage:', error)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoBedrock,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens: 0,
|
||||||
|
cacheReadTokens: 0
|
||||||
|
},
|
||||||
|
result.model,
|
||||||
|
'bedrock-stream',
|
||||||
|
_apiKeyIdBedrock,
|
||||||
|
'bedrock'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
_rateLimitInfoBedrock,
|
|
||||||
{
|
|
||||||
inputTokens,
|
|
||||||
outputTokens,
|
|
||||||
cacheCreateTokens: 0,
|
|
||||||
cacheReadTokens: 0
|
|
||||||
},
|
|
||||||
result.model,
|
|
||||||
'bedrock-stream',
|
|
||||||
_apiKeyIdBedrock,
|
|
||||||
'bedrock'
|
|
||||||
)
|
|
||||||
|
|
||||||
usageDataCaptured = true
|
usageDataCaptured = true
|
||||||
logger.api(
|
logger.api(
|
||||||
`📊 Bedrock stream usage recorded - Model: ${result.model}, Input: ${inputTokens}, Output: ${outputTokens}, Total: ${inputTokens + outputTokens} tokens`
|
`📊 Bedrock stream usage recorded - Model: ${result.model}, Input: ${inputTokens}, Output: ${outputTokens}, Total: ${inputTokens + outputTokens} tokens`
|
||||||
@@ -781,24 +835,39 @@ async function handleMessagesRequest(req, res) {
|
|||||||
|
|
||||||
apiKeyService
|
apiKeyService
|
||||||
.recordUsageWithDetails(_apiKeyIdCcr, usageObject, model, usageAccountId, 'ccr')
|
.recordUsageWithDetails(_apiKeyIdCcr, usageObject, model, usageAccountId, 'ccr')
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoCcr,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'ccr-stream',
|
||||||
|
_apiKeyIdCcr,
|
||||||
|
'ccr',
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record CCR stream usage:', error)
|
logger.error('❌ Failed to record CCR stream usage:', error)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
_rateLimitInfoCcr,
|
||||||
|
{
|
||||||
|
inputTokens,
|
||||||
|
outputTokens,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
'ccr-stream',
|
||||||
|
_apiKeyIdCcr,
|
||||||
|
'ccr'
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
_rateLimitInfoCcr,
|
|
||||||
{
|
|
||||||
inputTokens,
|
|
||||||
outputTokens,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens
|
|
||||||
},
|
|
||||||
model,
|
|
||||||
'ccr-stream',
|
|
||||||
_apiKeyIdCcr,
|
|
||||||
'ccr'
|
|
||||||
)
|
|
||||||
|
|
||||||
usageDataCaptured = true
|
usageDataCaptured = true
|
||||||
logger.api(
|
logger.api(
|
||||||
`📊 CCR stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
`📊 CCR stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||||
@@ -1143,7 +1212,7 @@ async function handleMessagesRequest(req, res) {
|
|||||||
|
|
||||||
// 记录真实的token使用量(包含模型信息和所有4种token以及账户ID)
|
// 记录真实的token使用量(包含模型信息和所有4种token以及账户ID)
|
||||||
const { accountId: responseAccountId } = response
|
const { accountId: responseAccountId } = response
|
||||||
await apiKeyService.recordUsage(
|
const nonStreamCosts = await apiKeyService.recordUsage(
|
||||||
_apiKeyIdNonStream,
|
_apiKeyIdNonStream,
|
||||||
inputTokens,
|
inputTokens,
|
||||||
outputTokens,
|
outputTokens,
|
||||||
@@ -1165,7 +1234,8 @@ async function handleMessagesRequest(req, res) {
|
|||||||
model,
|
model,
|
||||||
'claude-non-stream',
|
'claude-non-stream',
|
||||||
_apiKeyIdNonStream,
|
_apiKeyIdNonStream,
|
||||||
accountType
|
accountType,
|
||||||
|
nonStreamCosts
|
||||||
)
|
)
|
||||||
|
|
||||||
usageRecorded = true
|
usageRecorded = true
|
||||||
|
|||||||
@@ -277,7 +277,10 @@ router.post('/api/user-stats', async (req, res) => {
|
|||||||
cacheCreateTokens: 0,
|
cacheCreateTokens: 0,
|
||||||
cacheReadTokens: 0,
|
cacheReadTokens: 0,
|
||||||
ephemeral5mTokens: 0,
|
ephemeral5mTokens: 0,
|
||||||
ephemeral1hTokens: 0
|
ephemeral1hTokens: 0,
|
||||||
|
realCostMicro: 0,
|
||||||
|
ratedCostMicro: 0,
|
||||||
|
hasStoredCost: false
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -288,28 +291,39 @@ router.post('/api/user-stats', async (req, res) => {
|
|||||||
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||||
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||||
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||||
|
if ('realCostMicro' in data || 'ratedCostMicro' in data) {
|
||||||
|
modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0
|
||||||
|
modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
|
||||||
|
modelUsage.hasStoredCost = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 按模型计算费用并汇总
|
// 按模型计算费用并汇总
|
||||||
for (const [model, usage] of modelUsageMap) {
|
for (const [model, usage] of modelUsageMap) {
|
||||||
const usageData = {
|
if (usage.hasStoredCost) {
|
||||||
input_tokens: usage.inputTokens,
|
// 使用请求时已存储的费用(精确)
|
||||||
output_tokens: usage.outputTokens,
|
totalCost += usage.ratedCostMicro / 1000000
|
||||||
cache_creation_input_tokens: usage.cacheCreateTokens,
|
} else {
|
||||||
cache_read_input_tokens: usage.cacheReadTokens
|
// Legacy fallback:旧数据没有存储费用,从 token 重算
|
||||||
}
|
const usageData = {
|
||||||
|
input_tokens: usage.inputTokens,
|
||||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
output_tokens: usage.outputTokens,
|
||||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
cache_creation_input_tokens: usage.cacheCreateTokens,
|
||||||
usageData.cache_creation = {
|
cache_read_input_tokens: usage.cacheReadTokens
|
||||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
|
||||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const costResult = CostCalculator.calculateCost(usageData, model)
|
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||||
totalCost += costResult.costs.total
|
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||||
|
usageData.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||||
|
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const costResult = CostCalculator.calculateCost(usageData, model)
|
||||||
|
totalCost += costResult.costs.total
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 如果没有模型级别的详细数据,回退到总体数据计算
|
// 如果没有模型级别的详细数据,回退到总体数据计算
|
||||||
|
|||||||
@@ -30,7 +30,8 @@ function queueRateLimitUpdate(
|
|||||||
model,
|
model,
|
||||||
context = '',
|
context = '',
|
||||||
keyId = null,
|
keyId = null,
|
||||||
accountType = null
|
accountType = null,
|
||||||
|
preCalculatedCost = null
|
||||||
) {
|
) {
|
||||||
if (!rateLimitInfo) {
|
if (!rateLimitInfo) {
|
||||||
return
|
return
|
||||||
@@ -38,7 +39,7 @@ function queueRateLimitUpdate(
|
|||||||
|
|
||||||
const label = context ? ` (${context})` : ''
|
const label = context ? ` (${context})` : ''
|
||||||
|
|
||||||
updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType)
|
updateRateLimitCounters(rateLimitInfo, usageSummary, model, keyId, accountType, preCalculatedCost)
|
||||||
.then(({ totalTokens, totalCost }) => {
|
.then(({ totalTokens, totalCost }) => {
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
||||||
@@ -306,23 +307,38 @@ async function handleChatCompletion(req, res, apiKeyData) {
|
|||||||
accountId,
|
accountId,
|
||||||
accountType
|
accountType
|
||||||
)
|
)
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
req.rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
`openai-${accountType}-stream`,
|
||||||
|
req.apiKey?.id,
|
||||||
|
accountType,
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record usage:', error)
|
logger.error('❌ Failed to record usage:', error)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
req.rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
model,
|
||||||
|
`openai-${accountType}-stream`,
|
||||||
|
req.apiKey?.id,
|
||||||
|
accountType
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
req.rateLimitInfo,
|
|
||||||
{
|
|
||||||
inputTokens: usage.input_tokens || 0,
|
|
||||||
outputTokens: usage.output_tokens || 0,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens
|
|
||||||
},
|
|
||||||
model,
|
|
||||||
`openai-${accountType}-stream`,
|
|
||||||
req.apiKey?.id,
|
|
||||||
accountType
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -444,23 +460,38 @@ async function handleChatCompletion(req, res, apiKeyData) {
|
|||||||
accountId,
|
accountId,
|
||||||
accountType
|
accountType
|
||||||
)
|
)
|
||||||
|
.then((costs) => {
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
req.rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
claudeRequest.model,
|
||||||
|
`openai-${accountType}-non-stream`,
|
||||||
|
req.apiKey?.id,
|
||||||
|
accountType,
|
||||||
|
costs
|
||||||
|
)
|
||||||
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
logger.error('❌ Failed to record usage:', error)
|
logger.error('❌ Failed to record usage:', error)
|
||||||
|
queueRateLimitUpdate(
|
||||||
|
req.rateLimitInfo,
|
||||||
|
{
|
||||||
|
inputTokens: usage.input_tokens || 0,
|
||||||
|
outputTokens: usage.output_tokens || 0,
|
||||||
|
cacheCreateTokens,
|
||||||
|
cacheReadTokens
|
||||||
|
},
|
||||||
|
claudeRequest.model,
|
||||||
|
`openai-${accountType}-non-stream`,
|
||||||
|
req.apiKey?.id,
|
||||||
|
accountType
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
queueRateLimitUpdate(
|
|
||||||
req.rateLimitInfo,
|
|
||||||
{
|
|
||||||
inputTokens: usage.input_tokens || 0,
|
|
||||||
outputTokens: usage.output_tokens || 0,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens
|
|
||||||
},
|
|
||||||
claudeRequest.model,
|
|
||||||
`openai-${accountType}-non-stream`,
|
|
||||||
req.apiKey?.id,
|
|
||||||
accountType
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 返回 OpenAI 格式响应
|
// 返回 OpenAI 格式响应
|
||||||
|
|||||||
@@ -70,7 +70,14 @@ function extractCodexUsageHeaders(headers) {
|
|||||||
return hasData ? snapshot : null
|
return hasData ? snapshot : null
|
||||||
}
|
}
|
||||||
|
|
||||||
async function applyRateLimitTracking(req, usageSummary, model, context = '', accountType = null) {
|
async function applyRateLimitTracking(
|
||||||
|
req,
|
||||||
|
usageSummary,
|
||||||
|
model,
|
||||||
|
context = '',
|
||||||
|
accountType = null,
|
||||||
|
preCalculatedCost = null
|
||||||
|
) {
|
||||||
if (!req.rateLimitInfo) {
|
if (!req.rateLimitInfo) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -83,7 +90,8 @@ async function applyRateLimitTracking(req, usageSummary, model, context = '', ac
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
req.apiKey?.id,
|
req.apiKey?.id,
|
||||||
accountType
|
accountType,
|
||||||
|
preCalculatedCost
|
||||||
)
|
)
|
||||||
|
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
@@ -613,7 +621,7 @@ const handleResponses = async (req, res) => {
|
|||||||
// 计算实际输入token(总输入减去缓存部分)
|
// 计算实际输入token(总输入减去缓存部分)
|
||||||
const actualInputTokens = Math.max(0, totalInputTokens - cacheReadTokens)
|
const actualInputTokens = Math.max(0, totalInputTokens - cacheReadTokens)
|
||||||
|
|
||||||
await apiKeyService.recordUsage(
|
const nonStreamCosts = await apiKeyService.recordUsage(
|
||||||
apiKeyData.id,
|
apiKeyData.id,
|
||||||
actualInputTokens, // 传递实际输入(不含缓存)
|
actualInputTokens, // 传递实际输入(不含缓存)
|
||||||
outputTokens,
|
outputTokens,
|
||||||
@@ -638,7 +646,8 @@ const handleResponses = async (req, res) => {
|
|||||||
},
|
},
|
||||||
actualModel,
|
actualModel,
|
||||||
'openai-non-stream',
|
'openai-non-stream',
|
||||||
'openai'
|
'openai',
|
||||||
|
nonStreamCosts
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -729,7 +738,7 @@ const handleResponses = async (req, res) => {
|
|||||||
// 使用响应中的真实 model,如果没有则使用请求中的 model,最后回退到默认值
|
// 使用响应中的真实 model,如果没有则使用请求中的 model,最后回退到默认值
|
||||||
const modelToRecord = actualModel || requestedModel || 'gpt-4'
|
const modelToRecord = actualModel || requestedModel || 'gpt-4'
|
||||||
|
|
||||||
await apiKeyService.recordUsage(
|
const streamCosts = await apiKeyService.recordUsage(
|
||||||
apiKeyData.id,
|
apiKeyData.id,
|
||||||
actualInputTokens, // 传递实际输入(不含缓存)
|
actualInputTokens, // 传递实际输入(不含缓存)
|
||||||
outputTokens,
|
outputTokens,
|
||||||
@@ -755,7 +764,8 @@ const handleResponses = async (req, res) => {
|
|||||||
},
|
},
|
||||||
modelToRecord,
|
modelToRecord,
|
||||||
'openai-stream',
|
'openai-stream',
|
||||||
'openai'
|
'openai',
|
||||||
|
streamCosts
|
||||||
)
|
)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Failed to record OpenAI usage:', error)
|
logger.error('Failed to record OpenAI usage:', error)
|
||||||
|
|||||||
@@ -1805,7 +1805,8 @@ async function applyRateLimitTracking(
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
context = '',
|
context = '',
|
||||||
keyId = null
|
keyId = null,
|
||||||
|
preCalculatedCost = null
|
||||||
) {
|
) {
|
||||||
if (!rateLimitInfo) {
|
if (!rateLimitInfo) {
|
||||||
return
|
return
|
||||||
@@ -1819,7 +1820,8 @@ async function applyRateLimitTracking(
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
keyId,
|
keyId,
|
||||||
'gemini'
|
'gemini',
|
||||||
|
preCalculatedCost
|
||||||
)
|
)
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
logger.api(`📊 Updated rate limit token count${label}: +${totalTokens} tokens`)
|
||||||
@@ -2135,7 +2137,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
|
|||||||
: mapGeminiFinishReasonToAnthropicStopReason(finishReason)
|
: mapGeminiFinishReasonToAnthropicStopReason(finishReason)
|
||||||
|
|
||||||
if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
|
if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
|
||||||
await apiKeyService.recordUsage(
|
const bridgeCosts = await apiKeyService.recordUsage(
|
||||||
req.apiKey.id,
|
req.apiKey.id,
|
||||||
inputTokens,
|
inputTokens,
|
||||||
outputTokens,
|
outputTokens,
|
||||||
@@ -2150,7 +2152,8 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
|
|||||||
{ inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
|
{ inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
|
||||||
effectiveModel,
|
effectiveModel,
|
||||||
'anthropic-messages',
|
'anthropic-messages',
|
||||||
req.apiKey?.id
|
req.apiKey?.id,
|
||||||
|
bridgeCosts
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2675,7 +2678,7 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
|
if (req.apiKey?.id && (inputTokens > 0 || outputTokens > 0)) {
|
||||||
await apiKeyService.recordUsage(
|
const bridgeStreamCosts = await apiKeyService.recordUsage(
|
||||||
req.apiKey.id,
|
req.apiKey.id,
|
||||||
inputTokens,
|
inputTokens,
|
||||||
outputTokens,
|
outputTokens,
|
||||||
@@ -2689,7 +2692,9 @@ async function handleAnthropicMessagesToGemini(req, res, { vendor, baseModel })
|
|||||||
req.rateLimitInfo,
|
req.rateLimitInfo,
|
||||||
{ inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
|
{ inputTokens, outputTokens, cacheCreateTokens: 0, cacheReadTokens: 0 },
|
||||||
effectiveModel,
|
effectiveModel,
|
||||||
'anthropic-messages-stream'
|
'anthropic-messages-stream',
|
||||||
|
req.apiKey?.id,
|
||||||
|
bridgeStreamCosts
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1662,8 +1662,11 @@ class ApiKeyService {
|
|||||||
logParts.push(`Total: ${totalTokens} tokens`)
|
logParts.push(`Total: ${totalTokens} tokens`)
|
||||||
|
|
||||||
logger.database(`📊 Recorded usage: ${keyId} - ${logParts.join(', ')}`)
|
logger.database(`📊 Recorded usage: ${keyId} - ${logParts.join(', ')}`)
|
||||||
|
|
||||||
|
return { realCost, ratedCost }
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('❌ Failed to record usage:', error)
|
logger.error('❌ Failed to record usage:', error)
|
||||||
|
return { realCost: 0, ratedCost: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1958,8 +1961,11 @@ class ApiKeyService {
|
|||||||
// 发布失败不影响主流程,只记录错误
|
// 发布失败不影响主流程,只记录错误
|
||||||
logger.warn('⚠️ Failed to publish billing event:', err.message)
|
logger.warn('⚠️ Failed to publish billing event:', err.message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
return { realCost: realCostWithDetails, ratedCost: ratedCostWithDetails }
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('❌ Failed to record usage:', error)
|
logger.error('❌ Failed to record usage:', error)
|
||||||
|
return { realCost: 0, ratedCost: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -528,11 +528,6 @@ class PricingService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Claude Fast Mode 目前仅适用于 Opus 4.6 系列
|
|
||||||
isFastModeEligibleClaudeModel(modelName) {
|
|
||||||
return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
|
|
||||||
}
|
|
||||||
|
|
||||||
// 去掉模型名中的 [1m] 后缀,便于价格查找
|
// 去掉模型名中的 [1m] 后缀,便于价格查找
|
||||||
stripLongContextSuffix(modelName) {
|
stripLongContextSuffix(modelName) {
|
||||||
if (typeof modelName !== 'string') {
|
if (typeof modelName !== 'string') {
|
||||||
@@ -541,45 +536,6 @@ class PricingService {
|
|||||||
return modelName.replace(/\[1m\]/gi, '').trim()
|
return modelName.replace(/\[1m\]/gi, '').trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀)
|
|
||||||
getFastModePricing(modelName) {
|
|
||||||
if (!this.pricingData || !modelName) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
const cleanedModelName = this.stripLongContextSuffix(modelName)
|
|
||||||
const exactCandidates = new Set([`fast/${cleanedModelName}`])
|
|
||||||
|
|
||||||
if (cleanedModelName.startsWith('fast/')) {
|
|
||||||
exactCandidates.add(cleanedModelName)
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const candidate of exactCandidates) {
|
|
||||||
if (this.pricingData[candidate]) {
|
|
||||||
logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
|
|
||||||
return this.pricingData[candidate]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
|
|
||||||
for (const [key, value] of Object.entries(this.pricingData)) {
|
|
||||||
if (!key.startsWith('fast/')) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '')
|
|
||||||
if (
|
|
||||||
normalizedFastKey.includes(normalizedModel) ||
|
|
||||||
normalizedModel.includes(normalizedFastKey)
|
|
||||||
) {
|
|
||||||
logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(`💰 No fast pricing found for model: ${modelName}`)
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
|
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
|
||||||
getEphemeral1hPricing(modelName, pricing = null) {
|
getEphemeral1hPricing(modelName, pricing = null) {
|
||||||
if (
|
if (
|
||||||
@@ -606,7 +562,7 @@ class PricingService {
|
|||||||
|
|
||||||
// 检查是否是 Opus 系列
|
// 检查是否是 Opus 系列
|
||||||
if (modelLower.includes('opus')) {
|
if (modelLower.includes('opus')) {
|
||||||
return 0.00003 // $30/MTok
|
return 0.00001 // $10/MTok
|
||||||
}
|
}
|
||||||
|
|
||||||
// 检查是否是 Sonnet 系列
|
// 检查是否是 Sonnet 系列
|
||||||
@@ -616,7 +572,7 @@ class PricingService {
|
|||||||
|
|
||||||
// 检查是否是 Haiku 系列
|
// 检查是否是 Haiku 系列
|
||||||
if (modelLower.includes('haiku')) {
|
if (modelLower.includes('haiku')) {
|
||||||
return 0.0000016 // $1.6/MTok
|
return 0.000002 // $2/MTok
|
||||||
}
|
}
|
||||||
|
|
||||||
// 默认返回 0(未知模型)
|
// 默认返回 0(未知模型)
|
||||||
@@ -647,15 +603,14 @@ class PricingService {
|
|||||||
const hasFastSpeedSignal =
|
const hasFastSpeedSignal =
|
||||||
responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
|
responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
|
||||||
requestSpeed === this.claudeFeatureFlags.fastModeSpeed
|
requestSpeed === this.claudeFeatureFlags.fastModeSpeed
|
||||||
const isFastModeRequest =
|
const isFastModeRequest = hasFastModeBeta && hasFastSpeedSignal
|
||||||
hasFastModeBeta &&
|
|
||||||
hasFastSpeedSignal &&
|
|
||||||
this.isFastModeEligibleClaudeModel(normalizedModelName)
|
|
||||||
const standardPricing = this.getModelPricing(modelName)
|
const standardPricing = this.getModelPricing(modelName)
|
||||||
const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
|
const pricing = standardPricing
|
||||||
const pricing = fastPricing || standardPricing
|
|
||||||
const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
|
const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
|
||||||
|
|
||||||
|
// Fast Mode 倍率:优先从 provider_specific_entry.fast 读取,默认 6 倍
|
||||||
|
const fastMultiplier = isFastModeRequest ? pricing?.provider_specific_entry?.fast || 6 : 1
|
||||||
|
|
||||||
// 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑
|
// 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑
|
||||||
// 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格
|
// 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格
|
||||||
if (isLongContextModeEnabled && totalInputTokens > 200000) {
|
if (isLongContextModeEnabled && totalInputTokens > 200000) {
|
||||||
@@ -685,11 +640,13 @@ class PricingService {
|
|||||||
(typeof pricing?.litellm_provider === 'string' &&
|
(typeof pricing?.litellm_provider === 'string' &&
|
||||||
pricing.litellm_provider.toLowerCase().includes('anthropic'))
|
pricing.litellm_provider.toLowerCase().includes('anthropic'))
|
||||||
|
|
||||||
if (isFastModeRequest && fastPricing) {
|
if (isFastModeRequest && fastMultiplier > 1) {
|
||||||
logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
|
logger.info(
|
||||||
} else if (isFastModeRequest && !fastPricing) {
|
`🚀 Fast mode ${fastMultiplier}x multiplier applied for ${normalizedModelName} (from provider_specific_entry)`
|
||||||
|
)
|
||||||
|
} else if (isFastModeRequest) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
`⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
|
`⚠️ Fast mode request detected but no fast pricing found for ${normalizedModelName}; fallback to standard profile`
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -700,7 +657,7 @@ class PricingService {
|
|||||||
|
|
||||||
// 确定实际使用的输入价格(普通或 200K+ 高档价格)
|
// 确定实际使用的输入价格(普通或 200K+ 高档价格)
|
||||||
// Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底
|
// Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底
|
||||||
const actualInputPrice = useLongContextPricing
|
let actualInputPrice = useLongContextPricing
|
||||||
? hasInput200kPrice
|
? hasInput200kPrice
|
||||||
? pricing.input_cost_per_token_above_200k_tokens
|
? pricing.input_cost_per_token_above_200k_tokens
|
||||||
: isClaudeModel
|
: isClaudeModel
|
||||||
@@ -712,12 +669,18 @@ class PricingService {
|
|||||||
const hasOutput200kPrice =
|
const hasOutput200kPrice =
|
||||||
pricing.output_cost_per_token_above_200k_tokens !== null &&
|
pricing.output_cost_per_token_above_200k_tokens !== null &&
|
||||||
pricing.output_cost_per_token_above_200k_tokens !== undefined
|
pricing.output_cost_per_token_above_200k_tokens !== undefined
|
||||||
const actualOutputPrice = useLongContextPricing
|
let actualOutputPrice = useLongContextPricing
|
||||||
? hasOutput200kPrice
|
? hasOutput200kPrice
|
||||||
? pricing.output_cost_per_token_above_200k_tokens
|
? pricing.output_cost_per_token_above_200k_tokens
|
||||||
: baseOutputPrice
|
: baseOutputPrice
|
||||||
: baseOutputPrice
|
: baseOutputPrice
|
||||||
|
|
||||||
|
// 应用 Fast Mode 倍率(在 200K+ 价格之上叠加)
|
||||||
|
if (fastMultiplier > 1) {
|
||||||
|
actualInputPrice *= fastMultiplier
|
||||||
|
actualOutputPrice *= fastMultiplier
|
||||||
|
}
|
||||||
|
|
||||||
let actualCacheCreatePrice = 0
|
let actualCacheCreatePrice = 0
|
||||||
let actualCacheReadPrice = 0
|
let actualCacheReadPrice = 0
|
||||||
let actualEphemeral1hPrice = 0
|
let actualEphemeral1hPrice = 0
|
||||||
|
|||||||
@@ -91,7 +91,14 @@ class DroidRelayService {
|
|||||||
return normalizedBody
|
return normalizedBody
|
||||||
}
|
}
|
||||||
|
|
||||||
async _applyRateLimitTracking(rateLimitInfo, usageSummary, model, context = '', keyId = null) {
|
async _applyRateLimitTracking(
|
||||||
|
rateLimitInfo,
|
||||||
|
usageSummary,
|
||||||
|
model,
|
||||||
|
context = '',
|
||||||
|
keyId = null,
|
||||||
|
preCalculatedCost = null
|
||||||
|
) {
|
||||||
if (!rateLimitInfo) {
|
if (!rateLimitInfo) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -102,7 +109,8 @@ class DroidRelayService {
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
keyId,
|
keyId,
|
||||||
'droid'
|
'droid',
|
||||||
|
preCalculatedCost
|
||||||
)
|
)
|
||||||
|
|
||||||
if (totalTokens > 0) {
|
if (totalTokens > 0) {
|
||||||
@@ -616,7 +624,7 @@ class DroidRelayService {
|
|||||||
|
|
||||||
// 记录 usage 数据
|
// 记录 usage 数据
|
||||||
if (!skipUsageRecord) {
|
if (!skipUsageRecord) {
|
||||||
const normalizedUsage = await this._recordUsageFromStreamData(
|
const { normalizedUsage, costs: streamCosts } = await this._recordUsageFromStreamData(
|
||||||
currentUsageData,
|
currentUsageData,
|
||||||
apiKeyData,
|
apiKeyData,
|
||||||
account,
|
account,
|
||||||
@@ -635,7 +643,8 @@ class DroidRelayService {
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
' [stream]',
|
' [stream]',
|
||||||
keyId
|
keyId,
|
||||||
|
streamCosts
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.success(`Droid stream completed - Account: ${account.name}`)
|
logger.success(`Droid stream completed - Account: ${account.name}`)
|
||||||
@@ -871,8 +880,8 @@ class DroidRelayService {
|
|||||||
*/
|
*/
|
||||||
async _recordUsageFromStreamData(usageData, apiKeyData, account, model) {
|
async _recordUsageFromStreamData(usageData, apiKeyData, account, model) {
|
||||||
const normalizedUsage = this._normalizeUsageSnapshot(usageData)
|
const normalizedUsage = this._normalizeUsageSnapshot(usageData)
|
||||||
await this._recordUsage(apiKeyData, account, model, normalizedUsage)
|
const costs = await this._recordUsage(apiKeyData, account, model, normalizedUsage)
|
||||||
return normalizedUsage
|
return { normalizedUsage, costs }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1234,7 +1243,7 @@ class DroidRelayService {
|
|||||||
const normalizedUsage = this._normalizeUsageSnapshot(usage)
|
const normalizedUsage = this._normalizeUsageSnapshot(usage)
|
||||||
|
|
||||||
if (!skipUsageRecord) {
|
if (!skipUsageRecord) {
|
||||||
await this._recordUsage(apiKeyData, account, model, normalizedUsage)
|
const droidCosts = await this._recordUsage(apiKeyData, account, model, normalizedUsage)
|
||||||
|
|
||||||
const totalTokens = this._getTotalTokens(normalizedUsage)
|
const totalTokens = this._getTotalTokens(normalizedUsage)
|
||||||
|
|
||||||
@@ -1256,7 +1265,8 @@ class DroidRelayService {
|
|||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
endpointLabel,
|
endpointLabel,
|
||||||
keyId
|
keyId,
|
||||||
|
droidCosts
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.success(
|
logger.success(
|
||||||
@@ -1283,15 +1293,22 @@ class DroidRelayService {
|
|||||||
|
|
||||||
if (totalTokens <= 0) {
|
if (totalTokens <= 0) {
|
||||||
logger.debug('🪙 Droid usage 数据为空,跳过记录')
|
logger.debug('🪙 Droid usage 数据为空,跳过记录')
|
||||||
return
|
return { realCost: 0, ratedCost: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const keyId = apiKeyData?.id
|
const keyId = apiKeyData?.id
|
||||||
const accountId = this._extractAccountId(account)
|
const accountId = this._extractAccountId(account)
|
||||||
|
let costs = { realCost: 0, ratedCost: 0 }
|
||||||
|
|
||||||
if (keyId) {
|
if (keyId) {
|
||||||
await apiKeyService.recordUsageWithDetails(keyId, usageObject, model, accountId, 'droid')
|
costs = await apiKeyService.recordUsageWithDetails(
|
||||||
|
keyId,
|
||||||
|
usageObject,
|
||||||
|
model,
|
||||||
|
accountId,
|
||||||
|
'droid'
|
||||||
|
)
|
||||||
} else if (accountId) {
|
} else if (accountId) {
|
||||||
await redis.incrementAccountUsage(
|
await redis.incrementAccountUsage(
|
||||||
accountId,
|
accountId,
|
||||||
@@ -1307,14 +1324,17 @@ class DroidRelayService {
|
|||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
logger.warn('⚠️ 无法记录 Droid usage:缺少 API Key 和账户标识')
|
logger.warn('⚠️ 无法记录 Droid usage:缺少 API Key 和账户标识')
|
||||||
return
|
return { realCost: 0, ratedCost: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`📊 Droid usage recorded - Key: ${keyId || 'unknown'}, Account: ${accountId || 'unknown'}, Model: ${model}, Input: ${usageObject.input_tokens || 0}, Output: ${usageObject.output_tokens || 0}, Cache Create: ${usageObject.cache_creation_input_tokens || 0}, Cache Read: ${usageObject.cache_read_input_tokens || 0}, Total: ${totalTokens}`
|
`📊 Droid usage recorded - Key: ${keyId || 'unknown'}, Account: ${accountId || 'unknown'}, Model: ${model}, Input: ${usageObject.input_tokens || 0}, Output: ${usageObject.output_tokens || 0}, Cache Create: ${usageObject.cache_creation_input_tokens || 0}, Cache Read: ${usageObject.cache_read_input_tokens || 0}, Total: ${totalTokens}`
|
||||||
)
|
)
|
||||||
|
|
||||||
|
return costs
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('❌ Failed to record Droid usage:', error)
|
logger.error('❌ Failed to record Droid usage:', error)
|
||||||
|
return { realCost: 0, ratedCost: 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,12 +8,14 @@ function toNumber(value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// keyId 和 accountType 用于计算倍率成本
|
// keyId 和 accountType 用于计算倍率成本
|
||||||
|
// preCalculatedCost: 可选的 { realCost, ratedCost },由调用方提供以避免重复计算
|
||||||
async function updateRateLimitCounters(
|
async function updateRateLimitCounters(
|
||||||
rateLimitInfo,
|
rateLimitInfo,
|
||||||
usageSummary,
|
usageSummary,
|
||||||
model,
|
model,
|
||||||
keyId = null,
|
keyId = null,
|
||||||
accountType = null
|
accountType = null,
|
||||||
|
preCalculatedCost = null
|
||||||
) {
|
) {
|
||||||
if (!rateLimitInfo) {
|
if (!rateLimitInfo) {
|
||||||
return { totalTokens: 0, totalCost: 0, ratedCost: 0 }
|
return { totalTokens: 0, totalCost: 0, ratedCost: 0 }
|
||||||
@@ -36,47 +38,68 @@ async function updateRateLimitCounters(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let totalCost = 0
|
let totalCost = 0
|
||||||
const usagePayload = {
|
let ratedCost = 0
|
||||||
input_tokens: inputTokens,
|
|
||||||
output_tokens: outputTokens,
|
|
||||||
cache_creation_input_tokens: cacheCreateTokens,
|
|
||||||
cache_read_input_tokens: cacheReadTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
if (
|
||||||
const costInfo = pricingService.calculateCost(usagePayload, model)
|
preCalculatedCost &&
|
||||||
const { totalCost: calculatedCost } = costInfo || {}
|
typeof preCalculatedCost.ratedCost === 'number' &&
|
||||||
if (typeof calculatedCost === 'number') {
|
preCalculatedCost.ratedCost > 0
|
||||||
totalCost = calculatedCost
|
) {
|
||||||
|
// 使用调用方已计算好的费用(避免重复计算,且能正确处理 1h 缓存、Fast Mode 等特殊计费)
|
||||||
|
// eslint-disable-next-line prefer-destructuring
|
||||||
|
ratedCost = preCalculatedCost.ratedCost
|
||||||
|
totalCost = preCalculatedCost.realCost || 0
|
||||||
|
} else if (
|
||||||
|
preCalculatedCost &&
|
||||||
|
typeof preCalculatedCost.realCost === 'number' &&
|
||||||
|
preCalculatedCost.realCost > 0
|
||||||
|
) {
|
||||||
|
// 有 realCost 但 ratedCost 为 0 或缺失,使用 realCost
|
||||||
|
totalCost = preCalculatedCost.realCost
|
||||||
|
ratedCost = preCalculatedCost.realCost
|
||||||
|
} else {
|
||||||
|
// Legacy fallback:调用方未提供费用时自行计算(不支持 1h 缓存等特殊计费)
|
||||||
|
const usagePayload = {
|
||||||
|
input_tokens: inputTokens,
|
||||||
|
output_tokens: outputTokens,
|
||||||
|
cache_creation_input_tokens: cacheCreateTokens,
|
||||||
|
cache_read_input_tokens: cacheReadTokens
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
// 忽略此处错误,后续使用备用计算
|
|
||||||
totalCost = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if (totalCost === 0) {
|
|
||||||
try {
|
try {
|
||||||
const fallback = CostCalculator.calculateCost(usagePayload, model)
|
const costInfo = pricingService.calculateCost(usagePayload, model)
|
||||||
const { costs } = fallback || {}
|
const { totalCost: calculatedCost } = costInfo || {}
|
||||||
if (costs && typeof costs.total === 'number') {
|
if (typeof calculatedCost === 'number') {
|
||||||
totalCost = costs.total
|
totalCost = calculatedCost
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
// 忽略此处错误,后续使用备用计算
|
||||||
totalCost = 0
|
totalCost = 0
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 计算倍率成本(用于限流计数)
|
if (totalCost === 0) {
|
||||||
let ratedCost = totalCost
|
try {
|
||||||
if (totalCost > 0 && keyId) {
|
const fallback = CostCalculator.calculateCost(usagePayload, model)
|
||||||
try {
|
const { costs } = fallback || {}
|
||||||
const apiKeyService = require('../services/apiKeyService')
|
if (costs && typeof costs.total === 'number') {
|
||||||
const serviceRatesService = require('../services/serviceRatesService')
|
totalCost = costs.total
|
||||||
const service = serviceRatesService.getService(accountType, model)
|
}
|
||||||
ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost)
|
} catch (error) {
|
||||||
} catch (error) {
|
totalCost = 0
|
||||||
// 倍率计算失败时使用真实成本
|
}
|
||||||
ratedCost = totalCost
|
}
|
||||||
|
|
||||||
|
// 计算倍率成本(用于限流计数)
|
||||||
|
ratedCost = totalCost
|
||||||
|
if (totalCost > 0 && keyId) {
|
||||||
|
try {
|
||||||
|
const apiKeyService = require('../services/apiKeyService')
|
||||||
|
const serviceRatesService = require('../services/serviceRatesService')
|
||||||
|
const service = serviceRatesService.getService(accountType, model)
|
||||||
|
ratedCost = await apiKeyService.calculateRatedCost(keyId, service, totalCost)
|
||||||
|
} catch (error) {
|
||||||
|
ratedCost = totalCost
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -39,49 +39,27 @@ jest.mock('fs', () => {
|
|||||||
describe('PricingService - 200K+ Long Context Pricing', () => {
|
describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||||
let pricingService
|
let pricingService
|
||||||
const fs = require('fs')
|
const fs = require('fs')
|
||||||
|
const path = require('path')
|
||||||
|
|
||||||
// 模拟 claude-sonnet-4-20250514 的完整价格数据(来自 model_pricing.json)
|
// 使用真实的 model_pricing.json 数据(优先 data/,fallback 到 resources/)
|
||||||
const mockPricingData = {
|
const realFs = jest.requireActual('fs')
|
||||||
'claude-sonnet-4-20250514': {
|
const primaryPath = path.join(process.cwd(), 'data', 'model_pricing.json')
|
||||||
input_cost_per_token: 0.000003, // $3/MTok
|
const fallbackPath = path.join(
|
||||||
output_cost_per_token: 0.000015, // $15/MTok
|
process.cwd(),
|
||||||
cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
|
'resources',
|
||||||
cache_read_input_token_cost: 0.0000003, // $0.30/MTok
|
'model-pricing',
|
||||||
max_input_tokens: 1000000,
|
'model_prices_and_context_window.json'
|
||||||
// 200K+ 高档价格
|
)
|
||||||
input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
|
const pricingFilePath = realFs.existsSync(primaryPath) ? primaryPath : fallbackPath
|
||||||
output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
|
const pricingData = JSON.parse(realFs.readFileSync(pricingFilePath, 'utf8'))
|
||||||
cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x)
|
|
||||||
cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x)
|
|
||||||
// 1小时缓存价格
|
|
||||||
cache_creation_input_token_cost_above_1hr: 0.0000075,
|
|
||||||
cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015
|
|
||||||
},
|
|
||||||
// 没有 above_200k 字段的模型
|
|
||||||
'claude-3-haiku-20240307': {
|
|
||||||
input_cost_per_token: 0.00000025,
|
|
||||||
output_cost_per_token: 0.00000125,
|
|
||||||
cache_creation_input_token_cost: 0.0000003,
|
|
||||||
cache_read_input_token_cost: 0.00000003
|
|
||||||
},
|
|
||||||
// Fast Mode 适配测试模型(Opus 4.6)
|
|
||||||
'claude-opus-4-6': {
|
|
||||||
input_cost_per_token: 0.000005,
|
|
||||||
output_cost_per_token: 0.000025,
|
|
||||||
cache_creation_input_token_cost: 0.00000625,
|
|
||||||
cache_read_input_token_cost: 0.0000005,
|
|
||||||
input_cost_per_token_above_200k_tokens: 0.00001,
|
|
||||||
output_cost_per_token_above_200k_tokens: 0.0000375
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
// 清除缓存的模块
|
// 清除缓存的模块
|
||||||
jest.resetModules()
|
jest.resetModules()
|
||||||
|
|
||||||
// 配置 fs mock
|
// 配置 fs mock(防止 pricingService 初始化时的文件副作用)
|
||||||
fs.existsSync.mockReturnValue(true)
|
fs.existsSync.mockReturnValue(true)
|
||||||
fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData))
|
fs.readFileSync.mockReturnValue(JSON.stringify(pricingData))
|
||||||
fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() })
|
fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() })
|
||||||
fs.watchFile.mockImplementation(() => {})
|
fs.watchFile.mockImplementation(() => {})
|
||||||
fs.unwatchFile.mockImplementation(() => {})
|
fs.unwatchFile.mockImplementation(() => {})
|
||||||
@@ -89,8 +67,8 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
|
|||||||
// 重新加载 pricingService
|
// 重新加载 pricingService
|
||||||
pricingService = require('../src/services/pricingService')
|
pricingService = require('../src/services/pricingService')
|
||||||
|
|
||||||
// 直接设置价格数据(绕过初始化)
|
// 直接设置真实价格数据(绕过网络初始化)
|
||||||
pricingService.pricingData = mockPricingData
|
pricingService.pricingData = pricingData
|
||||||
pricingService.lastUpdated = new Date()
|
pricingService.lastUpdated = new Date()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user