mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-03-30 00:33:35 +00:00
Merge pull request #952 from sczheng189/feature/claude_price_count [skip ci]
feat: 1m上下文计费,1h计费
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -247,3 +247,5 @@ web/apiStats/
|
||||
|
||||
# Admin SPA build files
|
||||
web/admin-spa/dist/
|
||||
|
||||
.serena/
|
||||
|
||||
@@ -9,7 +9,7 @@ const ClientValidator = require('../validators/clientValidator')
|
||||
const ClaudeCodeValidator = require('../validators/clients/claudeCodeValidator')
|
||||
const claudeRelayConfigService = require('../services/claudeRelayConfigService')
|
||||
const { calculateWaitTimeStats } = require('../utils/statsHelper')
|
||||
const { isOpusModel } = require('../utils/modelHelper')
|
||||
const { isClaudeFamilyModel } = require('../utils/modelHelper')
|
||||
|
||||
// 工具函数
|
||||
function sleep(ms) {
|
||||
@@ -1256,7 +1256,7 @@ const authenticateApiKey = async (req, res, next) => {
|
||||
const model = requestBody.model || ''
|
||||
|
||||
// 判断是否为 Claude 模型
|
||||
if (isOpusModel(model)) {
|
||||
if (isClaudeFamilyModel(model)) {
|
||||
const weeklyOpusCost = validation.keyData.weeklyOpusCost || 0
|
||||
|
||||
if (weeklyOpusCost >= weeklyOpusCostLimit) {
|
||||
|
||||
@@ -1084,6 +1084,9 @@ class RedisClient {
|
||||
pipeline.hincrby(modelDaily, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(modelDaily, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(modelDaily, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(modelDaily, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(modelDaily, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
|
||||
// 按模型统计 - 每月
|
||||
pipeline.hincrby(modelMonthly, 'inputTokens', finalInputTokens)
|
||||
@@ -1092,6 +1095,9 @@ class RedisClient {
|
||||
pipeline.hincrby(modelMonthly, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(modelMonthly, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(modelMonthly, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(modelMonthly, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(modelMonthly, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
|
||||
// API Key级别的模型统计 - 每日
|
||||
pipeline.hincrby(keyModelDaily, 'inputTokens', finalInputTokens)
|
||||
@@ -1136,6 +1142,9 @@ class RedisClient {
|
||||
pipeline.hincrby(keyModelAlltime, 'cacheCreateTokens', finalCacheCreateTokens)
|
||||
pipeline.hincrby(keyModelAlltime, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(keyModelAlltime, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(keyModelAlltime, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(keyModelAlltime, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
// 费用统计
|
||||
if (realCost > 0) {
|
||||
pipeline.hincrby(keyModelAlltime, 'realCostMicro', Math.round(realCost * 1000000))
|
||||
@@ -1152,6 +1161,9 @@ class RedisClient {
|
||||
pipeline.hincrby(hourly, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(hourly, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(hourly, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(hourly, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(hourly, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
|
||||
// 按模型统计 - 每小时
|
||||
pipeline.hincrby(modelHourly, 'inputTokens', finalInputTokens)
|
||||
@@ -1160,6 +1172,9 @@ class RedisClient {
|
||||
pipeline.hincrby(modelHourly, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(modelHourly, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(modelHourly, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(modelHourly, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(modelHourly, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
|
||||
// API Key级别的模型统计 - 每小时
|
||||
pipeline.hincrby(keyModelHourly, 'inputTokens', finalInputTokens)
|
||||
@@ -1168,6 +1183,9 @@ class RedisClient {
|
||||
pipeline.hincrby(keyModelHourly, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(keyModelHourly, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(keyModelHourly, 'requests', 1)
|
||||
// 详细缓存类型统计
|
||||
pipeline.hincrby(keyModelHourly, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(keyModelHourly, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
// 费用统计
|
||||
if (realCost > 0) {
|
||||
pipeline.hincrby(keyModelHourly, 'realCostMicro', Math.round(realCost * 1000000))
|
||||
@@ -1235,18 +1253,24 @@ class RedisClient {
|
||||
pipeline.hincrby('usage:global:total', 'cacheCreateTokens', finalCacheCreateTokens)
|
||||
pipeline.hincrby('usage:global:total', 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby('usage:global:total', 'allTokens', totalTokens)
|
||||
pipeline.hincrby('usage:global:total', 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby('usage:global:total', 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
pipeline.hincrby(globalDaily, 'requests', 1)
|
||||
pipeline.hincrby(globalDaily, 'inputTokens', finalInputTokens)
|
||||
pipeline.hincrby(globalDaily, 'outputTokens', finalOutputTokens)
|
||||
pipeline.hincrby(globalDaily, 'cacheCreateTokens', finalCacheCreateTokens)
|
||||
pipeline.hincrby(globalDaily, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(globalDaily, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(globalDaily, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(globalDaily, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
pipeline.hincrby(globalMonthly, 'requests', 1)
|
||||
pipeline.hincrby(globalMonthly, 'inputTokens', finalInputTokens)
|
||||
pipeline.hincrby(globalMonthly, 'outputTokens', finalOutputTokens)
|
||||
pipeline.hincrby(globalMonthly, 'cacheCreateTokens', finalCacheCreateTokens)
|
||||
pipeline.hincrby(globalMonthly, 'cacheReadTokens', finalCacheReadTokens)
|
||||
pipeline.hincrby(globalMonthly, 'allTokens', totalTokens)
|
||||
pipeline.hincrby(globalMonthly, 'ephemeral5mTokens', ephemeral5mTokens)
|
||||
pipeline.hincrby(globalMonthly, 'ephemeral1hTokens', ephemeral1hTokens)
|
||||
pipeline.expire(globalDaily, 86400 * 32)
|
||||
pipeline.expire(globalMonthly, 86400 * 365)
|
||||
|
||||
@@ -1262,6 +1286,8 @@ class RedisClient {
|
||||
outputTokens = 0,
|
||||
cacheCreateTokens = 0,
|
||||
cacheReadTokens = 0,
|
||||
ephemeral5mTokens = 0,
|
||||
ephemeral1hTokens = 0,
|
||||
model = 'unknown',
|
||||
isLongContextRequest = false
|
||||
) {
|
||||
@@ -1293,6 +1319,8 @@ class RedisClient {
|
||||
const finalOutputTokens = outputTokens || 0
|
||||
const finalCacheCreateTokens = cacheCreateTokens || 0
|
||||
const finalCacheReadTokens = cacheReadTokens || 0
|
||||
const finalEphemeral5mTokens = ephemeral5mTokens || 0
|
||||
const finalEphemeral1hTokens = ephemeral1hTokens || 0
|
||||
const actualTotalTokens =
|
||||
finalInputTokens + finalOutputTokens + finalCacheCreateTokens + finalCacheReadTokens
|
||||
const coreTokens = finalInputTokens + finalOutputTokens
|
||||
@@ -1305,6 +1333,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountKey, 'totalOutputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountKey, 'totalCacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountKey, 'totalCacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountKey, 'totalEphemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountKey, 'totalEphemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountKey, 'totalAllTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountKey, 'totalRequests', 1),
|
||||
|
||||
@@ -1314,6 +1344,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountDaily, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountDaily, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountDaily, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountDaily, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountDaily, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountDaily, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountDaily, 'requests', 1),
|
||||
|
||||
@@ -1323,6 +1355,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountMonthly, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountMonthly, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountMonthly, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountMonthly, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountMonthly, 'requests', 1),
|
||||
|
||||
@@ -1332,6 +1366,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountHourly, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountHourly, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountHourly, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountHourly, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountHourly, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountHourly, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountHourly, 'requests', 1),
|
||||
|
||||
@@ -1352,6 +1388,16 @@ class RedisClient {
|
||||
`model:${normalizedModel}:cacheReadTokens`,
|
||||
finalCacheReadTokens
|
||||
),
|
||||
this.client.hincrby(
|
||||
accountHourly,
|
||||
`model:${normalizedModel}:ephemeral5mTokens`,
|
||||
finalEphemeral5mTokens
|
||||
),
|
||||
this.client.hincrby(
|
||||
accountHourly,
|
||||
`model:${normalizedModel}:ephemeral1hTokens`,
|
||||
finalEphemeral1hTokens
|
||||
),
|
||||
this.client.hincrby(accountHourly, `model:${normalizedModel}:allTokens`, actualTotalTokens),
|
||||
this.client.hincrby(accountHourly, `model:${normalizedModel}:requests`, 1),
|
||||
|
||||
@@ -1360,6 +1406,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountModelDaily, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountModelDaily, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountModelDaily, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountModelDaily, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountModelDaily, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountModelDaily, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountModelDaily, 'requests', 1),
|
||||
|
||||
@@ -1368,6 +1416,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountModelMonthly, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountModelMonthly, 'requests', 1),
|
||||
|
||||
@@ -1376,6 +1426,8 @@ class RedisClient {
|
||||
this.client.hincrby(accountModelHourly, 'outputTokens', finalOutputTokens),
|
||||
this.client.hincrby(accountModelHourly, 'cacheCreateTokens', finalCacheCreateTokens),
|
||||
this.client.hincrby(accountModelHourly, 'cacheReadTokens', finalCacheReadTokens),
|
||||
this.client.hincrby(accountModelHourly, 'ephemeral5mTokens', finalEphemeral5mTokens),
|
||||
this.client.hincrby(accountModelHourly, 'ephemeral1hTokens', finalEphemeral1hTokens),
|
||||
this.client.hincrby(accountModelHourly, 'allTokens', actualTotalTokens),
|
||||
this.client.hincrby(accountModelHourly, 'requests', 1),
|
||||
|
||||
@@ -1843,6 +1895,16 @@ class RedisClient {
|
||||
cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
totalCost += costResult.costs.total
|
||||
|
||||
@@ -1931,6 +1993,16 @@ class RedisClient {
|
||||
cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
costMap.set(accountId, costMap.get(accountId) + costResult.costs.total)
|
||||
}
|
||||
@@ -1972,6 +2044,17 @@ class RedisClient {
|
||||
cache_creation_input_tokens: parseInt(modelUsage.cacheCreateTokens || 0),
|
||||
cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0)
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
totalCost += costResult.costs.total
|
||||
}
|
||||
@@ -3622,6 +3705,8 @@ class RedisClient {
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0,
|
||||
allTokens: 0,
|
||||
requests: 0
|
||||
}
|
||||
@@ -3635,6 +3720,10 @@ class RedisClient {
|
||||
modelUsage[modelName].cacheCreateTokens += parseInt(value || 0)
|
||||
} else if (metric === 'cacheReadTokens') {
|
||||
modelUsage[modelName].cacheReadTokens += parseInt(value || 0)
|
||||
} else if (metric === 'ephemeral5mTokens') {
|
||||
modelUsage[modelName].ephemeral5mTokens += parseInt(value || 0)
|
||||
} else if (metric === 'ephemeral1hTokens') {
|
||||
modelUsage[modelName].ephemeral1hTokens += parseInt(value || 0)
|
||||
} else if (metric === 'allTokens') {
|
||||
modelUsage[modelName].allTokens += parseInt(value || 0)
|
||||
} else if (metric === 'requests') {
|
||||
|
||||
@@ -1289,6 +1289,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0,
|
||||
requests: 0
|
||||
})
|
||||
}
|
||||
@@ -1300,6 +1302,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
||||
parseInt(data.totalCacheCreateTokens) || parseInt(data.cacheCreateTokens) || 0
|
||||
stats.cacheReadTokens +=
|
||||
parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0
|
||||
stats.ephemeral5mTokens +=
|
||||
parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
|
||||
stats.ephemeral1hTokens +=
|
||||
parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
|
||||
stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
||||
|
||||
totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0
|
||||
@@ -1318,15 +1324,22 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) {
|
||||
cacheCreateTokens += stats.cacheCreateTokens
|
||||
cacheReadTokens += stats.cacheReadTokens
|
||||
|
||||
const costResult = CostCalculator.calculateCost(
|
||||
{
|
||||
input_tokens: stats.inputTokens,
|
||||
output_tokens: stats.outputTokens,
|
||||
cache_creation_input_tokens: stats.cacheCreateTokens,
|
||||
cache_read_input_tokens: stats.cacheReadTokens
|
||||
},
|
||||
model
|
||||
)
|
||||
const costUsage = {
|
||||
input_tokens: stats.inputTokens,
|
||||
output_tokens: stats.outputTokens,
|
||||
cache_creation_input_tokens: stats.cacheCreateTokens,
|
||||
cache_read_input_tokens: stats.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
||||
costUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(costUsage, model)
|
||||
totalCost += costResult.costs.total
|
||||
}
|
||||
|
||||
|
||||
@@ -417,6 +417,14 @@ router.get('/claude-accounts', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
usageData.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(`💰 Calculating cost for model ${modelName}:`, JSON.stringify(usageData))
|
||||
const costResult = CostCalculator.calculateCost(usageData, modelName)
|
||||
logger.debug(`💰 Cost result for ${modelName}: total=${costResult.costs.total}`)
|
||||
|
||||
@@ -484,7 +484,9 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
allTokens: 0
|
||||
allTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0
|
||||
}
|
||||
|
||||
stats.requests += parseInt(data.requests) || 0
|
||||
@@ -493,6 +495,8 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
|
||||
stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
stats.allTokens += parseInt(data.allTokens) || 0
|
||||
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
|
||||
modelStatsMap.set(normalizedModel, stats)
|
||||
}
|
||||
@@ -509,6 +513,14 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: stats.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
// 计算费用
|
||||
const costData = CostCalculator.calculateCost(usage, model)
|
||||
|
||||
|
||||
@@ -362,6 +362,16 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req,
|
||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, modelName)
|
||||
summedCost += costResult.costs.total
|
||||
}
|
||||
@@ -403,6 +413,15 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req,
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const fbEph5m = parseInt(dailyData?.ephemeral5mTokens) || 0
|
||||
const fbEph1h = parseInt(dailyData?.ephemeral1hTokens) || 0
|
||||
if (fbEph5m > 0 || fbEph1h > 0) {
|
||||
fallbackUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: fbEph5m,
|
||||
ephemeral_1h_input_tokens: fbEph1h
|
||||
}
|
||||
}
|
||||
const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
|
||||
cost = fallbackResult.costs.total
|
||||
}
|
||||
@@ -653,12 +672,23 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: modelCacheCreateTokens,
|
||||
cache_read_input_tokens: modelCacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const mEph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const mEph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
if (mEph5m > 0 || mEph1h > 0) {
|
||||
modelUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: mEph5m,
|
||||
ephemeral_1h_input_tokens: mEph1h
|
||||
}
|
||||
}
|
||||
const modelCostResult = CostCalculator.calculateCost(modelUsage, model)
|
||||
hourCost += modelCostResult.costs.total
|
||||
}
|
||||
|
||||
// 如果没有模型级别的数据,尝试API Key级别的数据
|
||||
if (modelKeys.length === 0) {
|
||||
let hourEph5m = 0
|
||||
let hourEph1h = 0
|
||||
for (const key of usageKeys) {
|
||||
const data = usageDataMap.get(key)
|
||||
if (data) {
|
||||
@@ -667,6 +697,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
hourRequests += parseInt(data.requests) || 0
|
||||
hourCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
hourCacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
hourEph5m += parseInt(data.ephemeral5mTokens) || 0
|
||||
hourEph1h += parseInt(data.ephemeral1hTokens) || 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -676,6 +708,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: hourCacheCreateTokens,
|
||||
cache_read_input_tokens: hourCacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (hourEph5m > 0 || hourEph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: hourEph5m,
|
||||
ephemeral_1h_input_tokens: hourEph1h
|
||||
}
|
||||
}
|
||||
const costResult = CostCalculator.calculateCost(usage, 'unknown')
|
||||
hourCost = costResult.costs.total
|
||||
}
|
||||
@@ -786,6 +825,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
const modelOutputTokens = parseInt(data.outputTokens) || 0
|
||||
const modelCacheCreateTokens = parseInt(data.cacheCreateTokens) || 0
|
||||
const modelCacheReadTokens = parseInt(data.cacheReadTokens) || 0
|
||||
const modelEphemeral5mTokens = parseInt(data.ephemeral5mTokens) || 0
|
||||
const modelEphemeral1hTokens = parseInt(data.ephemeral1hTokens) || 0
|
||||
const modelRequests = parseInt(data.requests) || 0
|
||||
|
||||
dayInputTokens += modelInputTokens
|
||||
@@ -800,12 +841,23 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: modelCacheCreateTokens,
|
||||
cache_read_input_tokens: modelCacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (modelEphemeral5mTokens > 0 || modelEphemeral1hTokens > 0) {
|
||||
modelUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: modelEphemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: modelEphemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const modelCostResult = CostCalculator.calculateCost(modelUsage, model)
|
||||
dayCost += modelCostResult.costs.total
|
||||
}
|
||||
|
||||
// 如果没有模型级别的数据,回退到原始方法
|
||||
if (modelKeys.length === 0 && usageKeys.length > 0) {
|
||||
let dayEph5m = 0
|
||||
let dayEph1h = 0
|
||||
for (const key of usageKeys) {
|
||||
const data = usageDataMap.get(key)
|
||||
if (data) {
|
||||
@@ -814,6 +866,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
dayRequests += parseInt(data.requests) || 0
|
||||
dayCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
dayCacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
dayEph5m += parseInt(data.ephemeral5mTokens) || 0
|
||||
dayEph1h += parseInt(data.ephemeral1hTokens) || 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -823,6 +877,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: dayCacheCreateTokens,
|
||||
cache_read_input_tokens: dayCacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (dayEph5m > 0 || dayEph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: dayEph5m,
|
||||
ephemeral_1h_input_tokens: dayEph1h
|
||||
}
|
||||
}
|
||||
const costResult = CostCalculator.calculateCost(usage, 'unknown')
|
||||
dayCost = costResult.costs.total
|
||||
}
|
||||
@@ -948,6 +1009,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0,
|
||||
allTokens: 0
|
||||
})
|
||||
}
|
||||
@@ -957,6 +1020,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
stats.outputTokens += parseInt(data.outputTokens) || 0
|
||||
stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
stats.allTokens += parseInt(data.allTokens) || 0
|
||||
}
|
||||
}
|
||||
@@ -992,6 +1057,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0,
|
||||
allTokens: 0
|
||||
})
|
||||
}
|
||||
@@ -1001,6 +1068,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
stats.outputTokens += parseInt(data.outputTokens) || 0
|
||||
stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
stats.allTokens += parseInt(data.allTokens) || 0
|
||||
}
|
||||
}
|
||||
@@ -1016,6 +1085,14 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
cache_read_input_tokens: stats.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: stats.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: stats.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
// 使用CostCalculator计算费用
|
||||
const costData = CostCalculator.calculateCost(usage, model)
|
||||
|
||||
@@ -1070,6 +1147,16 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) =
|
||||
cache_read_input_tokens: usageData.cacheReadTokens || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const histEph5m = usageData.ephemeral5mTokens || 0
|
||||
const histEph1h = usageData.ephemeral1hTokens || 0
|
||||
if (histEph5m > 0 || histEph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: histEph5m,
|
||||
ephemeral_1h_input_tokens: histEph1h
|
||||
}
|
||||
}
|
||||
|
||||
// 对于汇总数据,使用默认模型计算费用
|
||||
const costData = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
|
||||
|
||||
@@ -1424,6 +1511,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, modelName)
|
||||
cost += costResult.costs.total
|
||||
}
|
||||
@@ -1435,6 +1532,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const fbEph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const fbEph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
if (fbEph5m > 0 || fbEph1h > 0) {
|
||||
fallbackUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: fbEph5m,
|
||||
ephemeral_1h_input_tokens: fbEph1h
|
||||
}
|
||||
}
|
||||
const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
|
||||
cost = fallbackResult.costs.total
|
||||
}
|
||||
@@ -1582,6 +1688,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, modelName)
|
||||
cost += costResult.costs.total
|
||||
}
|
||||
@@ -1593,6 +1709,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const fbEph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const fbEph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
if (fbEph5m > 0 || fbEph1h > 0) {
|
||||
fallbackUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: fbEph5m,
|
||||
ephemeral_1h_input_tokens: fbEph1h
|
||||
}
|
||||
}
|
||||
const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel)
|
||||
cost = fallbackResult.costs.total
|
||||
}
|
||||
@@ -1787,7 +1912,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreateTokens,
|
||||
cacheReadTokens
|
||||
cacheReadTokens,
|
||||
ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0,
|
||||
ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1813,6 +1940,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
||||
apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
|
||||
@@ -1831,6 +1968,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: data.cacheCreateTokens,
|
||||
cache_read_input_tokens: data.cacheReadTokens
|
||||
}
|
||||
if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: data.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: data.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
|
||||
cost = fallbackResult.costs.total
|
||||
formattedCost = fallbackResult.formatted.total
|
||||
@@ -1947,7 +2090,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreateTokens,
|
||||
cacheReadTokens
|
||||
cacheReadTokens,
|
||||
ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0,
|
||||
ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1973,6 +2118,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(modelData.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(modelData.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
const currentCost = apiKeyCostMap.get(apiKeyId) || 0
|
||||
apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total)
|
||||
@@ -1991,6 +2146,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => {
|
||||
cache_creation_input_tokens: data.cacheCreateTokens,
|
||||
cache_read_input_tokens: data.cacheReadTokens
|
||||
}
|
||||
if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: data.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: data.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022')
|
||||
cost = fallbackResult.costs.total
|
||||
formattedCost = fallbackResult.formatted.total
|
||||
@@ -2142,7 +2303,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2151,6 +2314,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
modelUsage.outputTokens += parseInt(data.outputTokens) || 0
|
||||
modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
}
|
||||
|
||||
// 计算7天统计的费用
|
||||
@@ -2164,6 +2329,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
usageData.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usageData, model)
|
||||
totalCosts.inputCost += costResult.costs.input
|
||||
totalCosts.outputCost += costResult.costs.output
|
||||
@@ -2243,7 +2416,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2252,6 +2427,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
modelUsage.outputTokens += parseInt(data.outputTokens) || 0
|
||||
modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
}
|
||||
|
||||
// 使用模型级别的数据计算费用
|
||||
@@ -2265,6 +2442,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
usageData.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usageData, model)
|
||||
totalCosts.inputCost += costResult.costs.input
|
||||
totalCosts.outputCost += costResult.costs.output
|
||||
@@ -2305,6 +2490,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: apiKey.usage.total.cacheReadTokens || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const totalEph5m = apiKey.usage.total.ephemeral5mTokens || 0
|
||||
const totalEph1h = apiKey.usage.total.ephemeral1hTokens || 0
|
||||
if (totalEph5m > 0 || totalEph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: totalEph5m,
|
||||
ephemeral_1h_input_tokens: totalEph1h
|
||||
}
|
||||
}
|
||||
|
||||
// 使用加权平均价格计算(基于当前活跃模型的价格分布)
|
||||
const costResult = CostCalculator.calculateCost(usage, 'claude-3-5-haiku-20241022')
|
||||
totalCosts.inputCost += costResult.costs.input
|
||||
@@ -2377,6 +2572,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
|
||||
// 累加总费用
|
||||
@@ -2517,13 +2722,27 @@ router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res)
|
||||
return null
|
||||
}
|
||||
|
||||
const toUsageObject = (record) => ({
|
||||
input_tokens: record.inputTokens || 0,
|
||||
output_tokens: record.outputTokens || 0,
|
||||
cache_creation_input_tokens: record.cacheCreateTokens || 0,
|
||||
cache_read_input_tokens: record.cacheReadTokens || 0,
|
||||
cache_creation: record.cacheCreation || record.cache_creation || null
|
||||
})
|
||||
const toUsageObject = (record) => {
|
||||
const usage = {
|
||||
input_tokens: record.inputTokens || 0,
|
||||
output_tokens: record.outputTokens || 0,
|
||||
cache_creation_input_tokens: record.cacheCreateTokens || 0,
|
||||
cache_read_input_tokens: record.cacheReadTokens || 0,
|
||||
cache_creation: record.cacheCreation || record.cache_creation || null
|
||||
}
|
||||
// 如果没有 cache_creation 但有独立存储的 ephemeral 字段,构建子对象
|
||||
if (!usage.cache_creation) {
|
||||
const eph5m = parseInt(record.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(record.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
const withinRange = (record) => {
|
||||
if (!record.timestamp) {
|
||||
@@ -2816,13 +3035,27 @@ router.get('/accounts/:accountId/usage-records', authenticateAdmin, async (req,
|
||||
keysToUse = [{ id: apiKeyId }]
|
||||
}
|
||||
|
||||
const toUsageObject = (record) => ({
|
||||
input_tokens: record.inputTokens || 0,
|
||||
output_tokens: record.outputTokens || 0,
|
||||
cache_creation_input_tokens: record.cacheCreateTokens || 0,
|
||||
cache_read_input_tokens: record.cacheReadTokens || 0,
|
||||
cache_creation: record.cacheCreation || record.cache_creation || null
|
||||
})
|
||||
const toUsageObject = (record) => {
|
||||
const usage = {
|
||||
input_tokens: record.inputTokens || 0,
|
||||
output_tokens: record.outputTokens || 0,
|
||||
cache_creation_input_tokens: record.cacheCreateTokens || 0,
|
||||
cache_read_input_tokens: record.cacheReadTokens || 0,
|
||||
cache_creation: record.cacheCreation || record.cache_creation || null
|
||||
}
|
||||
// 如果没有 cache_creation 但有独立存储的 ephemeral 字段,构建子对象
|
||||
if (!usage.cache_creation) {
|
||||
const eph5m = parseInt(record.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(record.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
const withinRange = (record) => {
|
||||
if (!record.timestamp) {
|
||||
|
||||
@@ -468,6 +468,19 @@ async function handleMessagesRequest(req, res) {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
const requestBetaHeader =
|
||||
_headers['anthropic-beta'] ||
|
||||
_headers['Anthropic-Beta'] ||
|
||||
_headers['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageObject.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) {
|
||||
usageObject.request_speed = _requestBody.speed.trim().toLowerCase()
|
||||
}
|
||||
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
|
||||
usageObject.speed = usageData.speed.trim().toLowerCase()
|
||||
}
|
||||
|
||||
// 如果有详细的缓存创建数据,添加到 usage 对象中
|
||||
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
|
||||
@@ -562,6 +575,22 @@ async function handleMessagesRequest(req, res) {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
const requestBetaHeader =
|
||||
_headersConsole['anthropic-beta'] ||
|
||||
_headersConsole['Anthropic-Beta'] ||
|
||||
_headersConsole['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageObject.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (
|
||||
typeof _requestBodyConsole?.speed === 'string' &&
|
||||
_requestBodyConsole.speed.trim()
|
||||
) {
|
||||
usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase()
|
||||
}
|
||||
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
|
||||
usageObject.speed = usageData.speed.trim().toLowerCase()
|
||||
}
|
||||
|
||||
// 如果有详细的缓存创建数据,添加到 usage 对象中
|
||||
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
|
||||
@@ -728,6 +757,19 @@ async function handleMessagesRequest(req, res) {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
const requestBetaHeader =
|
||||
_headersCcr['anthropic-beta'] ||
|
||||
_headersCcr['Anthropic-Beta'] ||
|
||||
_headersCcr['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageObject.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) {
|
||||
usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase()
|
||||
}
|
||||
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
|
||||
usageObject.speed = usageData.speed.trim().toLowerCase()
|
||||
}
|
||||
|
||||
// 如果有详细的缓存创建数据,添加到 usage 对象中
|
||||
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
|
||||
|
||||
@@ -275,7 +275,9 @@ router.post('/api/user-stats', async (req, res) => {
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0
|
||||
})
|
||||
}
|
||||
|
||||
@@ -284,6 +286,8 @@ router.post('/api/user-stats', async (req, res) => {
|
||||
modelUsage.outputTokens += parseInt(data.outputTokens) || 0
|
||||
modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,6 +300,14 @@ router.post('/api/user-stats', async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
usageData.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usageData, model)
|
||||
totalCost += costResult.costs.total
|
||||
}
|
||||
@@ -310,6 +322,14 @@ router.post('/api/user-stats', async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
costUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022')
|
||||
totalCost = costResult.costs.total
|
||||
}
|
||||
@@ -328,6 +348,14 @@ router.post('/api/user-stats', async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
costUsage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022')
|
||||
totalCost = costResult.costs.total
|
||||
formattedCost = costResult.formatted.total
|
||||
@@ -797,6 +825,8 @@ router.post('/api/batch-model-stats', async (req, res) => {
|
||||
outputTokens: 0,
|
||||
cacheCreateTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
ephemeral5mTokens: 0,
|
||||
ephemeral1hTokens: 0,
|
||||
allTokens: 0,
|
||||
realCostMicro: 0,
|
||||
ratedCostMicro: 0,
|
||||
@@ -810,6 +840,8 @@ router.post('/api/batch-model-stats', async (req, res) => {
|
||||
modelUsage.outputTokens += parseInt(data.outputTokens) || 0
|
||||
modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0
|
||||
modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0
|
||||
modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0
|
||||
modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0
|
||||
modelUsage.allTokens += parseInt(data.allTokens) || 0
|
||||
modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0
|
||||
modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0
|
||||
@@ -832,6 +864,14 @@ router.post('/api/batch-model-stats', async (req, res) => {
|
||||
cache_read_input_tokens: usage.cacheReadTokens
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) {
|
||||
usageData.cache_creation = {
|
||||
ephemeral_5m_input_tokens: usage.ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: usage.ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
// 优先使用存储的费用,否则回退到重新计算
|
||||
const { hasStoredCost } = usage
|
||||
const costData = CostCalculator.calculateCost(usageData, model)
|
||||
@@ -1362,6 +1402,8 @@ router.post('/api/user-model-stats', async (req, res) => {
|
||||
const model = match[1]
|
||||
|
||||
if (data && Object.keys(data).length > 0) {
|
||||
const ephemeral5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const ephemeral1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
const usage = {
|
||||
input_tokens: parseInt(data.inputTokens) || 0,
|
||||
output_tokens: parseInt(data.outputTokens) || 0,
|
||||
@@ -1369,6 +1411,14 @@ router.post('/api/user-model-stats', async (req, res) => {
|
||||
cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
if (ephemeral5m > 0 || ephemeral1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: ephemeral5m,
|
||||
ephemeral_1h_input_tokens: ephemeral1h
|
||||
}
|
||||
}
|
||||
|
||||
// 优先使用存储的费用,否则回退到重新计算
|
||||
// 检查字段是否存在(而非 > 0),以支持真正的零成本场景
|
||||
const realCostMicro = parseInt(data.realCostMicro) || 0
|
||||
|
||||
@@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) {
|
||||
(usage.cache_creation.ephemeral_1h_input_tokens || 0)
|
||||
: usage.cache_creation_input_tokens || 0) || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
const usageWithRequestMeta = { ...usage }
|
||||
const requestBetaHeader =
|
||||
req.headers['anthropic-beta'] ||
|
||||
req.headers['Anthropic-Beta'] ||
|
||||
req.headers['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
|
||||
usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
|
||||
}
|
||||
|
||||
// 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
|
||||
apiKeyService
|
||||
.recordUsageWithDetails(
|
||||
apiKeyData.id,
|
||||
usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据
|
||||
usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed)
|
||||
model,
|
||||
accountId,
|
||||
accountType
|
||||
@@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) {
|
||||
(usage.cache_creation.ephemeral_1h_input_tokens || 0)
|
||||
: usage.cache_creation_input_tokens || 0) || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
const usageWithRequestMeta = { ...usage }
|
||||
const requestBetaHeader =
|
||||
req.headers['anthropic-beta'] ||
|
||||
req.headers['Anthropic-Beta'] ||
|
||||
req.headers['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
|
||||
usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
|
||||
}
|
||||
// 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
|
||||
apiKeyService
|
||||
.recordUsageWithDetails(
|
||||
apiKeyData.id,
|
||||
usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据
|
||||
usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed)
|
||||
claudeRequest.model,
|
||||
accountId,
|
||||
accountType
|
||||
|
||||
@@ -607,6 +607,16 @@ class AccountBalanceService {
|
||||
cache_read_input_tokens: parseInt(data.cacheReadTokens || 0)
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费
|
||||
const eph5m = parseInt(data.ephemeral5mTokens || 0)
|
||||
const eph1h = parseInt(data.ephemeral1hTokens || 0)
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
totalCost += costResult.costs.total || 0
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ const config = require('../../config/config')
|
||||
const redis = require('../models/redis')
|
||||
const logger = require('../utils/logger')
|
||||
const serviceRatesService = require('./serviceRatesService')
|
||||
const { isOpusModel } = require('../utils/modelHelper')
|
||||
const { isClaudeFamilyModel } = require('../utils/modelHelper')
|
||||
|
||||
const ACCOUNT_TYPE_CONFIG = {
|
||||
claude: { prefix: 'claude:account:' },
|
||||
@@ -1599,6 +1599,8 @@ class ApiKeyService {
|
||||
outputTokens,
|
||||
cacheCreateTokens,
|
||||
cacheReadTokens,
|
||||
0, // ephemeral5mTokens - recordUsage 不含详细缓存数据
|
||||
0, // ephemeral1hTokens - recordUsage 不含详细缓存数据
|
||||
model,
|
||||
isLongContextRequest
|
||||
)
|
||||
@@ -1649,7 +1651,7 @@ class ApiKeyService {
|
||||
async recordOpusCost(keyId, ratedCost, realCost, model, accountType) {
|
||||
try {
|
||||
// 判断是否为 Claude 系列模型(包含 Bedrock 格式等)
|
||||
if (!isOpusModel(model)) {
|
||||
if (!isClaudeFamilyModel(model)) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1834,6 +1836,8 @@ class ApiKeyService {
|
||||
outputTokens,
|
||||
cacheCreateTokens,
|
||||
cacheReadTokens,
|
||||
ephemeral5mTokens,
|
||||
ephemeral1hTokens,
|
||||
model,
|
||||
costInfo.isLongContextRequest || false
|
||||
)
|
||||
|
||||
@@ -201,6 +201,16 @@ class CostInitService {
|
||||
parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
const eph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
const costResult = CostCalculator.calculateCost(usage, model)
|
||||
const cost = costResult.costs.total
|
||||
|
||||
|
||||
@@ -64,15 +64,18 @@ class PricingService {
|
||||
'claude-haiku-3-5': 0.0000016
|
||||
}
|
||||
|
||||
// 硬编码的 1M 上下文模型价格(美元/token)
|
||||
// 当总输入 tokens 超过 200k 时使用这些价格
|
||||
this.longContextPricing = {
|
||||
// claude-sonnet-4-20250514[1m] 模型的 1M 上下文价格
|
||||
'claude-sonnet-4-20250514[1m]': {
|
||||
input: 0.000006, // $6/MTok
|
||||
output: 0.0000225 // $22.50/MTok
|
||||
}
|
||||
// 未来可以添加更多 1M 模型的价格
|
||||
// Claude Prompt Caching 官方倍率(基于输入价格)
|
||||
this.claudeCacheMultipliers = {
|
||||
write5m: 1.25,
|
||||
write1h: 2,
|
||||
read: 0.1
|
||||
}
|
||||
|
||||
// Claude 扩展计费特性
|
||||
this.claudeFeatureFlags = {
|
||||
context1mBeta: 'context-1m-2025-08-07',
|
||||
fastModeBeta: 'fast-mode-2026-02-01',
|
||||
fastModeSpeed: 'fast'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,14 +465,139 @@ class PricingService {
|
||||
return pricing
|
||||
}
|
||||
|
||||
// 获取 1 小时缓存价格
|
||||
getEphemeral1hPricing(modelName) {
|
||||
// 从 usage 对象中提取 beta 特性列表(小写)
|
||||
extractBetaFeatures(usage) {
|
||||
const features = new Set()
|
||||
if (!usage || typeof usage !== 'object') {
|
||||
return features
|
||||
}
|
||||
|
||||
const requestHeaders = usage.request_headers || usage.requestHeaders || null
|
||||
const headerBeta =
|
||||
requestHeaders && typeof requestHeaders === 'object'
|
||||
? requestHeaders['anthropic-beta'] ||
|
||||
requestHeaders['Anthropic-Beta'] ||
|
||||
requestHeaders['ANTHROPIC-BETA']
|
||||
: null
|
||||
|
||||
const candidates = [
|
||||
usage.anthropic_beta,
|
||||
usage.anthropicBeta,
|
||||
usage.request_anthropic_beta,
|
||||
usage.requestAnthropicBeta,
|
||||
usage.beta_header,
|
||||
usage.betaHeader,
|
||||
usage.beta_features,
|
||||
headerBeta
|
||||
]
|
||||
|
||||
const addFeature = (value) => {
|
||||
if (!value || typeof value !== 'string') {
|
||||
return
|
||||
}
|
||||
value
|
||||
.split(',')
|
||||
.map((item) => item.trim().toLowerCase())
|
||||
.filter(Boolean)
|
||||
.forEach((item) => features.add(item))
|
||||
}
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (Array.isArray(candidate)) {
|
||||
candidate.forEach(addFeature)
|
||||
} else {
|
||||
addFeature(candidate)
|
||||
}
|
||||
}
|
||||
|
||||
return features
|
||||
}
|
||||
|
||||
// 提取请求/响应中的 speed 字段(小写)
|
||||
extractSpeedSignal(usage) {
|
||||
if (!usage || typeof usage !== 'object') {
|
||||
return { responseSpeed: '', requestSpeed: '' }
|
||||
}
|
||||
|
||||
const normalize = (value) =>
|
||||
typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : ''
|
||||
|
||||
return {
|
||||
responseSpeed: normalize(usage.speed),
|
||||
requestSpeed: normalize(usage.request_speed || usage.requestSpeed)
|
||||
}
|
||||
}
|
||||
|
||||
// Claude Fast Mode 目前仅适用于 Opus 4.6 系列
|
||||
isFastModeEligibleClaudeModel(modelName) {
|
||||
return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
|
||||
}
|
||||
|
||||
// 去掉模型名中的 [1m] 后缀,便于价格查找
|
||||
stripLongContextSuffix(modelName) {
|
||||
if (typeof modelName !== 'string') {
|
||||
return modelName
|
||||
}
|
||||
return modelName.replace(/\[1m\]/gi, '').trim()
|
||||
}
|
||||
|
||||
// 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀)
|
||||
getFastModePricing(modelName) {
|
||||
if (!this.pricingData || !modelName) {
|
||||
return null
|
||||
}
|
||||
|
||||
const cleanedModelName = this.stripLongContextSuffix(modelName)
|
||||
const exactCandidates = new Set([`fast/${cleanedModelName}`])
|
||||
|
||||
if (cleanedModelName.startsWith('fast/')) {
|
||||
exactCandidates.add(cleanedModelName)
|
||||
}
|
||||
|
||||
for (const candidate of exactCandidates) {
|
||||
if (this.pricingData[candidate]) {
|
||||
logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
|
||||
return this.pricingData[candidate]
|
||||
}
|
||||
}
|
||||
|
||||
const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
|
||||
for (const [key, value] of Object.entries(this.pricingData)) {
|
||||
if (!key.startsWith('fast/')) {
|
||||
continue
|
||||
}
|
||||
const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '')
|
||||
if (
|
||||
normalizedFastKey.includes(normalizedModel) ||
|
||||
normalizedModel.includes(normalizedFastKey)
|
||||
) {
|
||||
logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(`💰 No fast pricing found for model: ${modelName}`)
|
||||
return null
|
||||
}
|
||||
|
||||
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
|
||||
getEphemeral1hPricing(modelName, pricing = null) {
|
||||
if (
|
||||
pricing?.cache_creation_input_token_cost_above_1hr !== null &&
|
||||
pricing?.cache_creation_input_token_cost_above_1hr !== undefined
|
||||
) {
|
||||
return pricing.cache_creation_input_token_cost_above_1hr
|
||||
}
|
||||
|
||||
if (!modelName) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// 尝试直接匹配
|
||||
if (this.ephemeral1hPricing[modelName]) {
|
||||
if (
|
||||
this.ephemeral1hPricing[modelName] !== null &&
|
||||
this.ephemeral1hPricing[modelName] !== undefined
|
||||
) {
|
||||
return this.ephemeral1hPricing[modelName]
|
||||
}
|
||||
|
||||
@@ -498,40 +626,47 @@ class PricingService {
|
||||
|
||||
// 计算使用费用
|
||||
calculateCost(usage, modelName) {
|
||||
// 检查是否为 1M 上下文模型
|
||||
const isLongContextModel = modelName && modelName.includes('[1m]')
|
||||
const normalizedModelName = this.stripLongContextSuffix(modelName)
|
||||
|
||||
// 检查是否为 1M 上下文模型(用户通过 [1m] 后缀主动选择长上下文模式)
|
||||
const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]')
|
||||
let isLongContextRequest = false
|
||||
let useLongContextPricing = false
|
||||
|
||||
if (isLongContextModel) {
|
||||
// 计算总输入 tokens
|
||||
const inputTokens = usage.input_tokens || 0
|
||||
const cacheCreationTokens = usage.cache_creation_input_tokens || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
|
||||
// 计算总输入 tokens(用于判断是否超过 200K 阈值)
|
||||
const inputTokens = usage.input_tokens || 0
|
||||
const cacheCreationTokens = usage.cache_creation_input_tokens || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
|
||||
|
||||
// 如果总输入超过 200k,使用 1M 上下文价格
|
||||
if (totalInputTokens > 200000) {
|
||||
isLongContextRequest = true
|
||||
// 检查是否有硬编码的 1M 价格
|
||||
if (this.longContextPricing[modelName]) {
|
||||
useLongContextPricing = true
|
||||
} else {
|
||||
// 如果没有找到硬编码价格,使用第一个 1M 模型的价格作为默认
|
||||
const defaultLongContextModel = Object.keys(this.longContextPricing)[0]
|
||||
if (defaultLongContextModel) {
|
||||
useLongContextPricing = true
|
||||
logger.warn(
|
||||
`⚠️ No specific 1M pricing for ${modelName}, using default from ${defaultLongContextModel}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
// 识别 Claude 特性标识
|
||||
const betaFeatures = this.extractBetaFeatures(usage)
|
||||
const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta)
|
||||
const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta)
|
||||
const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage)
|
||||
const hasFastSpeedSignal =
|
||||
responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
|
||||
requestSpeed === this.claudeFeatureFlags.fastModeSpeed
|
||||
const isFastModeRequest =
|
||||
hasFastModeBeta &&
|
||||
hasFastSpeedSignal &&
|
||||
this.isFastModeEligibleClaudeModel(normalizedModelName)
|
||||
const standardPricing = this.getModelPricing(modelName)
|
||||
const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
|
||||
const pricing = fastPricing || standardPricing
|
||||
const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
|
||||
|
||||
// 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑
|
||||
// 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格
|
||||
if (isLongContextModeEnabled && totalInputTokens > 200000) {
|
||||
isLongContextRequest = true
|
||||
useLongContextPricing = true
|
||||
logger.info(
|
||||
`💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
|
||||
)
|
||||
}
|
||||
|
||||
const pricing = this.getModelPricing(modelName)
|
||||
|
||||
if (!pricing && !useLongContextPricing) {
|
||||
if (!pricing) {
|
||||
return {
|
||||
inputCost: 0,
|
||||
outputCost: 0,
|
||||
@@ -545,59 +680,109 @@ class PricingService {
|
||||
}
|
||||
}
|
||||
|
||||
let inputCost = 0
|
||||
let outputCost = 0
|
||||
const isClaudeModel =
|
||||
(modelName && modelName.toLowerCase().includes('claude')) ||
|
||||
(typeof pricing?.litellm_provider === 'string' &&
|
||||
pricing.litellm_provider.toLowerCase().includes('anthropic'))
|
||||
|
||||
if (useLongContextPricing) {
|
||||
// 使用 1M 上下文特殊价格(仅输入和输出价格改变)
|
||||
const longContextPrices =
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
|
||||
inputCost = (usage.input_tokens || 0) * longContextPrices.input
|
||||
outputCost = (usage.output_tokens || 0) * longContextPrices.output
|
||||
|
||||
logger.info(
|
||||
`💰 Using 1M context pricing for ${modelName}: input=$${longContextPrices.input}/token, output=$${longContextPrices.output}/token`
|
||||
if (isFastModeRequest && fastPricing) {
|
||||
logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
|
||||
} else if (isFastModeRequest && !fastPricing) {
|
||||
logger.warn(
|
||||
`⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
|
||||
)
|
||||
} else {
|
||||
// 使用正常价格
|
||||
inputCost = (usage.input_tokens || 0) * (pricing?.input_cost_per_token || 0)
|
||||
outputCost = (usage.output_tokens || 0) * (pricing?.output_cost_per_token || 0)
|
||||
}
|
||||
|
||||
// 缓存价格保持不变(即使对于 1M 模型)
|
||||
const cacheReadCost =
|
||||
(usage.cache_read_input_tokens || 0) * (pricing?.cache_read_input_token_cost || 0)
|
||||
const baseInputPrice = pricing.input_cost_per_token || 0
|
||||
const hasInput200kPrice =
|
||||
pricing.input_cost_per_token_above_200k_tokens !== null &&
|
||||
pricing.input_cost_per_token_above_200k_tokens !== undefined
|
||||
|
||||
// 处理缓存创建费用:
|
||||
// 1. 如果有详细的 cache_creation 对象,使用它
|
||||
// 2. 否则使用总的 cache_creation_input_tokens(向后兼容)
|
||||
// 确定实际使用的输入价格(普通或 200K+ 高档价格)
|
||||
// Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底
|
||||
const actualInputPrice = useLongContextPricing
|
||||
? hasInput200kPrice
|
||||
? pricing.input_cost_per_token_above_200k_tokens
|
||||
: isClaudeModel
|
||||
? baseInputPrice * 2
|
||||
: baseInputPrice
|
||||
: baseInputPrice
|
||||
|
||||
const baseOutputPrice = pricing.output_cost_per_token || 0
|
||||
const hasOutput200kPrice =
|
||||
pricing.output_cost_per_token_above_200k_tokens !== null &&
|
||||
pricing.output_cost_per_token_above_200k_tokens !== undefined
|
||||
const actualOutputPrice = useLongContextPricing
|
||||
? hasOutput200kPrice
|
||||
? pricing.output_cost_per_token_above_200k_tokens
|
||||
: baseOutputPrice
|
||||
: baseOutputPrice
|
||||
|
||||
let actualCacheCreatePrice = 0
|
||||
let actualCacheReadPrice = 0
|
||||
let actualEphemeral1hPrice = 0
|
||||
|
||||
if (isClaudeModel) {
|
||||
// Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差
|
||||
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
|
||||
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
|
||||
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
|
||||
} else {
|
||||
actualCacheCreatePrice = useLongContextPricing
|
||||
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
|
||||
pricing.cache_creation_input_token_cost ||
|
||||
0
|
||||
: pricing.cache_creation_input_token_cost || 0
|
||||
|
||||
actualCacheReadPrice = useLongContextPricing
|
||||
? pricing.cache_read_input_token_cost_above_200k_tokens ||
|
||||
pricing.cache_read_input_token_cost ||
|
||||
0
|
||||
: pricing.cache_read_input_token_cost || 0
|
||||
|
||||
const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
|
||||
|
||||
// 非 Claude 模型维持原有字段优先级
|
||||
actualEphemeral1hPrice = useLongContextPricing
|
||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
|
||||
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
|
||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
|
||||
: defaultEphemeral1hPrice
|
||||
: defaultEphemeral1hPrice
|
||||
}
|
||||
|
||||
// 计算各项费用
|
||||
const inputCost = inputTokens * actualInputPrice
|
||||
const outputCost = (usage.output_tokens || 0) * actualOutputPrice
|
||||
|
||||
// 处理缓存费用
|
||||
let ephemeral5mCost = 0
|
||||
let ephemeral1hCost = 0
|
||||
let cacheCreateCost = 0
|
||||
let cacheReadCost = 0
|
||||
|
||||
if (usage.cache_creation && typeof usage.cache_creation === 'object') {
|
||||
// 有详细的缓存创建数据
|
||||
const ephemeral5mTokens = usage.cache_creation.ephemeral_5m_input_tokens || 0
|
||||
const ephemeral1hTokens = usage.cache_creation.ephemeral_1h_input_tokens || 0
|
||||
|
||||
// 5分钟缓存使用标准的 cache_creation_input_token_cost
|
||||
ephemeral5mCost = ephemeral5mTokens * (pricing?.cache_creation_input_token_cost || 0)
|
||||
// 5分钟缓存使用 cache_creation 价格
|
||||
ephemeral5mCost = ephemeral5mTokens * actualCacheCreatePrice
|
||||
|
||||
// 1小时缓存使用硬编码的价格
|
||||
const ephemeral1hPrice = this.getEphemeral1hPricing(modelName)
|
||||
ephemeral1hCost = ephemeral1hTokens * ephemeral1hPrice
|
||||
// 1小时缓存使用 ephemeral_1h 价格
|
||||
ephemeral1hCost = ephemeral1hTokens * actualEphemeral1hPrice
|
||||
|
||||
// 总的缓存创建费用
|
||||
cacheCreateCost = ephemeral5mCost + ephemeral1hCost
|
||||
} else if (usage.cache_creation_input_tokens) {
|
||||
} else if (cacheCreationTokens) {
|
||||
// 旧格式,所有缓存创建 tokens 都按 5 分钟价格计算(向后兼容)
|
||||
cacheCreateCost =
|
||||
(usage.cache_creation_input_tokens || 0) * (pricing?.cache_creation_input_token_cost || 0)
|
||||
cacheCreateCost = cacheCreationTokens * actualCacheCreatePrice
|
||||
ephemeral5mCost = cacheCreateCost
|
||||
}
|
||||
|
||||
// 缓存读取费用
|
||||
cacheReadCost = cacheReadTokens * actualCacheReadPrice
|
||||
|
||||
return {
|
||||
inputCost,
|
||||
outputCost,
|
||||
@@ -609,21 +794,11 @@ class PricingService {
|
||||
hasPricing: true,
|
||||
isLongContextRequest,
|
||||
pricing: {
|
||||
input: useLongContextPricing
|
||||
? (
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
)?.input || 0
|
||||
: pricing?.input_cost_per_token || 0,
|
||||
output: useLongContextPricing
|
||||
? (
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
)?.output || 0
|
||||
: pricing?.output_cost_per_token || 0,
|
||||
cacheCreate: pricing?.cache_creation_input_token_cost || 0,
|
||||
cacheRead: pricing?.cache_read_input_token_cost || 0,
|
||||
ephemeral1h: this.getEphemeral1hPricing(modelName)
|
||||
input: actualInputPrice,
|
||||
output: actualOutputPrice,
|
||||
cacheCreate: actualCacheCreatePrice,
|
||||
cacheRead: actualCacheReadPrice,
|
||||
ephemeral1h: actualEphemeral1hPrice
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1300,6 +1300,8 @@ class DroidRelayService {
|
||||
usageObject.output_tokens || 0,
|
||||
usageObject.cache_creation_input_tokens || 0,
|
||||
usageObject.cache_read_input_tokens || 0,
|
||||
0, // ephemeral5mTokens - Droid 不含详细缓存数据
|
||||
0, // ephemeral1hTokens - Droid 不含详细缓存数据
|
||||
model,
|
||||
false
|
||||
)
|
||||
|
||||
@@ -2,7 +2,7 @@ const redis = require('../models/redis')
|
||||
const logger = require('../utils/logger')
|
||||
const pricingService = require('./pricingService')
|
||||
const serviceRatesService = require('./serviceRatesService')
|
||||
const { isOpusModel } = require('../utils/modelHelper')
|
||||
const { isClaudeFamilyModel } = require('../utils/modelHelper')
|
||||
|
||||
function pad2(n) {
|
||||
return String(n).padStart(2, '0')
|
||||
@@ -151,7 +151,7 @@ class WeeklyClaudeCostInitService {
|
||||
}
|
||||
const keyId = match[1]
|
||||
const model = match[2]
|
||||
if (!isOpusModel(model)) {
|
||||
if (!isClaudeFamilyModel(model)) {
|
||||
continue
|
||||
}
|
||||
matchedClaudeKeys++
|
||||
|
||||
@@ -188,22 +188,6 @@ function isOpus45OrNewer(modelName) {
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否为 Opus 模型(任意版本)
|
||||
* 匹配所有包含 "opus" 关键词的 Claude 模型
|
||||
*/
|
||||
function isOpusModel(modelName) {
|
||||
if (!modelName || typeof modelName !== 'string') {
|
||||
return false
|
||||
}
|
||||
const { baseModel } = parseVendorPrefixedModel(modelName)
|
||||
const m = (baseModel || '').trim().toLowerCase()
|
||||
if (!m) {
|
||||
return false
|
||||
}
|
||||
return m.includes('opus')
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断某个 model 名称是否属于 Anthropic Claude 系列模型。
|
||||
*
|
||||
@@ -253,6 +237,5 @@ module.exports = {
|
||||
getEffectiveModel,
|
||||
getVendorType,
|
||||
isOpus45OrNewer,
|
||||
isOpusModel,
|
||||
isClaudeFamilyModel
|
||||
}
|
||||
|
||||
397
tests/pricingService.test.js
Normal file
397
tests/pricingService.test.js
Normal file
@@ -0,0 +1,397 @@
|
||||
/**
|
||||
* PricingService 长上下文(200K+)分层计费测试
|
||||
*
|
||||
* 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑:
|
||||
* - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
|
||||
* - Claude 缓存价格按输入价格倍率推导:
|
||||
* - 5m cache write = input * 1.25
|
||||
* - 1h cache write = input * 2
|
||||
* - cache read = input * 0.1
|
||||
*/
|
||||
|
||||
// Mock logger to avoid console output during tests
|
||||
jest.mock('../src/utils/logger', () => ({
|
||||
api: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
info: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
success: jest.fn(),
|
||||
database: jest.fn(),
|
||||
security: jest.fn()
|
||||
}))
|
||||
|
||||
// Mock fs to control pricing data
|
||||
jest.mock('fs', () => {
|
||||
const actual = jest.requireActual('fs')
|
||||
return {
|
||||
...actual,
|
||||
existsSync: jest.fn(),
|
||||
readFileSync: jest.fn(),
|
||||
writeFileSync: jest.fn(),
|
||||
mkdirSync: jest.fn(),
|
||||
statSync: jest.fn(),
|
||||
watchFile: jest.fn(),
|
||||
unwatchFile: jest.fn()
|
||||
}
|
||||
})
|
||||
|
||||
describe('PricingService - 200K+ Long Context Pricing', () => {
|
||||
let pricingService
|
||||
const fs = require('fs')
|
||||
|
||||
// 模拟 claude-sonnet-4-20250514 的完整价格数据(来自 model_pricing.json)
|
||||
const mockPricingData = {
|
||||
'claude-sonnet-4-20250514': {
|
||||
input_cost_per_token: 0.000003, // $3/MTok
|
||||
output_cost_per_token: 0.000015, // $15/MTok
|
||||
cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
|
||||
cache_read_input_token_cost: 0.0000003, // $0.30/MTok
|
||||
max_input_tokens: 1000000,
|
||||
// 200K+ 高档价格
|
||||
input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
|
||||
output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
|
||||
cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x)
|
||||
cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x)
|
||||
// 1小时缓存价格
|
||||
cache_creation_input_token_cost_above_1hr: 0.0000075,
|
||||
cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015
|
||||
},
|
||||
// 没有 above_200k 字段的模型
|
||||
'claude-3-haiku-20240307': {
|
||||
input_cost_per_token: 0.00000025,
|
||||
output_cost_per_token: 0.00000125,
|
||||
cache_creation_input_token_cost: 0.0000003,
|
||||
cache_read_input_token_cost: 0.00000003
|
||||
},
|
||||
// Fast Mode 适配测试模型(Opus 4.6)
|
||||
'claude-opus-4-6': {
|
||||
input_cost_per_token: 0.000005,
|
||||
output_cost_per_token: 0.000025,
|
||||
cache_creation_input_token_cost: 0.00000625,
|
||||
cache_read_input_token_cost: 0.0000005,
|
||||
input_cost_per_token_above_200k_tokens: 0.00001,
|
||||
output_cost_per_token_above_200k_tokens: 0.0000375
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
// 清除缓存的模块
|
||||
jest.resetModules()
|
||||
|
||||
// 配置 fs mock
|
||||
fs.existsSync.mockReturnValue(true)
|
||||
fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData))
|
||||
fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() })
|
||||
fs.watchFile.mockImplementation(() => {})
|
||||
fs.unwatchFile.mockImplementation(() => {})
|
||||
|
||||
// 重新加载 pricingService
|
||||
pricingService = require('../src/services/pricingService')
|
||||
|
||||
// 直接设置价格数据(绕过初始化)
|
||||
pricingService.pricingData = mockPricingData
|
||||
pricingService.lastUpdated = new Date()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
// 清理定时器
|
||||
if (pricingService.cleanup) {
|
||||
pricingService.cleanup()
|
||||
}
|
||||
jest.clearAllMocks()
|
||||
})
|
||||
|
||||
describe('阈值边界测试', () => {
|
||||
it('199999 tokens - 应使用基础价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 199999,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(false)
|
||||
expect(result.pricing.input).toBe(0.000003) // 基础价格
|
||||
expect(result.pricing.output).toBe(0.000015) // 基础价格
|
||||
})
|
||||
|
||||
it('200000 tokens - 应使用基础价格(边界不触发)', () => {
|
||||
const usage = {
|
||||
input_tokens: 200000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
// 200000 不大于 200000,所以不触发高档价格
|
||||
expect(result.isLongContextRequest).toBe(false)
|
||||
expect(result.pricing.input).toBe(0.000003) // 基础价格
|
||||
})
|
||||
|
||||
it('200001 tokens - 应使用 200K+ 高档价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 200001,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.000006) // 200K+ 高档价格
|
||||
expect(result.pricing.output).toBe(0.0000225) // 200K+ 高档价格
|
||||
})
|
||||
})
|
||||
|
||||
describe('总输入计算(input + cache_creation + cache_read)', () => {
|
||||
it('分散在各类 token 中总计超过 200K 应触发高档价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 150000,
|
||||
output_tokens: 10000,
|
||||
cache_creation_input_tokens: 40000,
|
||||
cache_read_input_tokens: 20000
|
||||
}
|
||||
// Total: 150000 + 40000 + 20000 = 210000 > 200000
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.000006)
|
||||
expect(result.pricing.output).toBe(0.0000225)
|
||||
expect(result.pricing.cacheCreate).toBe(0.0000075)
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
|
||||
})
|
||||
|
||||
it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
|
||||
const usage = {
|
||||
input_tokens: 50000,
|
||||
output_tokens: 5000,
|
||||
cache_creation_input_tokens: 100000,
|
||||
cache_read_input_tokens: 60000
|
||||
}
|
||||
// Total: 50000 + 100000 + 60000 = 210000 > 200000
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Cache 高档价格测试', () => {
|
||||
it('cache_creation 应使用 cache_creation_input_token_cost_above_200k_tokens', () => {
|
||||
const usage = {
|
||||
input_tokens: 150000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 60000, // 60K cache creation
|
||||
cache_read_input_tokens: 0
|
||||
}
|
||||
// Total: 210000 > 200000
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
// cache_creation_input_token_cost_above_200k_tokens = 0.0000075
|
||||
expect(result.pricing.cacheCreate).toBe(0.0000075)
|
||||
expect(result.cacheCreateCost).toBeCloseTo(60000 * 0.0000075, 10)
|
||||
})
|
||||
|
||||
it('cache_read 应使用 cache_read_input_token_cost_above_200k_tokens', () => {
|
||||
const usage = {
|
||||
input_tokens: 150000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 60000 // 60K cache read
|
||||
}
|
||||
// Total: 210000 > 200000
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
// cache_read_input_token_cost_above_200k_tokens = 0.0000006
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
|
||||
expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
|
||||
})
|
||||
})
|
||||
|
||||
describe('详细缓存创建数据(ephemeral_5m / ephemeral_1h)', () => {
|
||||
it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
|
||||
const usage = {
|
||||
input_tokens: 200001,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 10000, // 向后兼容字段
|
||||
cache_read_input_tokens: 0,
|
||||
cache_creation: {
|
||||
ephemeral_5m_input_tokens: 5000,
|
||||
ephemeral_1h_input_tokens: 5000
|
||||
}
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
// ephemeral_5m: 5000 * 0.0000075 = 0.0000375
|
||||
expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
|
||||
// 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
|
||||
expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
|
||||
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
|
||||
})
|
||||
})
|
||||
|
||||
describe('回退测试', () => {
|
||||
it('Claude 模型无 above_200k 字段时,200K+ 输入价格按 2 倍并推导缓存价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 250000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 10000
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
|
||||
|
||||
// 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
|
||||
// 缓存价格由输入价格推导
|
||||
expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
|
||||
})
|
||||
})
|
||||
|
||||
describe('Header 与 Fast Mode 适配', () => {
|
||||
it('无 [1m] 后缀但带 context-1m beta,超过 200K 时应触发长上下文计费', () => {
|
||||
const usage = {
|
||||
input_tokens: 210000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
request_anthropic_beta: 'context-1m-2025-08-07'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
expect(result.pricing.input).toBe(0.000006)
|
||||
expect(result.pricing.output).toBe(0.0000225)
|
||||
})
|
||||
|
||||
it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
|
||||
const usage = {
|
||||
input_tokens: 100000,
|
||||
output_tokens: 20000,
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 5000,
|
||||
request_anthropic_beta: 'fast-mode-2026-02-01',
|
||||
speed: 'fast'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
|
||||
|
||||
// input: 0.000005 * 6 = 0.00003
|
||||
expect(result.pricing.input).toBeCloseTo(0.00003, 12)
|
||||
// output: 0.000025 * 6 = 0.00015
|
||||
expect(result.pricing.output).toBeCloseTo(0.00015, 12)
|
||||
// cache create/read 由 fast 后 input 推导
|
||||
expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
|
||||
expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
|
||||
})
|
||||
|
||||
it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费(12x input)', () => {
|
||||
const usage = {
|
||||
input_tokens: 210000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 10000,
|
||||
cache_read_input_tokens: 10000,
|
||||
request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
|
||||
speed: 'fast'
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(true)
|
||||
// input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
|
||||
expect(result.pricing.input).toBeCloseTo(0.00006, 12)
|
||||
// output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
|
||||
expect(result.pricing.output).toBeCloseTo(0.000225, 12)
|
||||
})
|
||||
})
|
||||
|
||||
describe('兼容性测试', () => {
|
||||
it('非 [1m] 模型不受影响,始终使用基础价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 250000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0
|
||||
}
|
||||
|
||||
// 不带 [1m] 后缀
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(false)
|
||||
expect(result.pricing.input).toBe(0.000003) // 基础价格
|
||||
expect(result.pricing.output).toBe(0.000015) // 基础价格
|
||||
expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
|
||||
expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
|
||||
})
|
||||
|
||||
it('[1m] 模型未超过 200K 时使用基础价格', () => {
|
||||
const usage = {
|
||||
input_tokens: 100000,
|
||||
output_tokens: 1000,
|
||||
cache_creation_input_tokens: 50000,
|
||||
cache_read_input_tokens: 49000
|
||||
}
|
||||
// Total: 199000 < 200000
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
expect(result.isLongContextRequest).toBe(false)
|
||||
expect(result.pricing.input).toBe(0.000003) // 基础价格
|
||||
})
|
||||
|
||||
it('无定价数据时返回 hasPricing=false', () => {
|
||||
const usage = {
|
||||
input_tokens: 250000,
|
||||
output_tokens: 1000
|
||||
}
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'unknown-model[1m]')
|
||||
|
||||
expect(result.hasPricing).toBe(false)
|
||||
expect(result.totalCost).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('成本计算准确性', () => {
|
||||
it('应正确计算 200K+ 场景下的总成本', () => {
|
||||
const usage = {
|
||||
input_tokens: 150000,
|
||||
output_tokens: 10000,
|
||||
cache_creation_input_tokens: 40000,
|
||||
cache_read_input_tokens: 20000
|
||||
}
|
||||
// Total input: 210000 > 200000 → 使用 200K+ 价格
|
||||
|
||||
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
|
||||
|
||||
// 手动计算预期成本
|
||||
const expectedInputCost = 150000 * 0.000006 // $0.9
|
||||
const expectedOutputCost = 10000 * 0.0000225 // $0.225
|
||||
const expectedCacheCreateCost = 40000 * 0.0000075 // $0.3
|
||||
const expectedCacheReadCost = 20000 * 0.0000006 // $0.012
|
||||
const expectedTotal =
|
||||
expectedInputCost + expectedOutputCost + expectedCacheCreateCost + expectedCacheReadCost
|
||||
|
||||
expect(result.inputCost).toBeCloseTo(expectedInputCost, 10)
|
||||
expect(result.outputCost).toBeCloseTo(expectedOutputCost, 10)
|
||||
expect(result.cacheCreateCost).toBeCloseTo(expectedCacheCreateCost, 10)
|
||||
expect(result.cacheReadCost).toBeCloseTo(expectedCacheReadCost, 10)
|
||||
expect(result.totalCost).toBeCloseTo(expectedTotal, 10)
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user