fix: 修复1h缓存定价与sub2api项目对齐

This commit is contained in:
shaw
2026-03-03 11:08:13 +08:00
parent d7b4a54a4c
commit 99dd19b32d
5 changed files with 6376 additions and 22539 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1222,21 +1222,61 @@ async function handleMessagesRequest(req, res) {
) {
const inputTokens = jsonData.usage.input_tokens || 0
const outputTokens = jsonData.usage.output_tokens || 0
const cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
let cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
let ephemeral5mTokens = 0
let ephemeral1hTokens = 0
if (jsonData.usage.cache_creation && typeof jsonData.usage.cache_creation === 'object') {
ephemeral5mTokens = jsonData.usage.cache_creation.ephemeral_5m_input_tokens || 0
ephemeral1hTokens = jsonData.usage.cache_creation.ephemeral_1h_input_tokens || 0
cacheCreateTokens = ephemeral5mTokens + ephemeral1hTokens
}
const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
// Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
const model = usageBaseModel || rawModel
// 构建 usage 对象以传递给 recordUsageWithDetails
const usageObject = {
input_tokens: inputTokens,
output_tokens: outputTokens,
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
}
// 添加请求元信息
const requestBetaHeader =
_headersNonStream['anthropic-beta'] ||
_headersNonStream['Anthropic-Beta'] ||
_headersNonStream['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageObject.request_anthropic_beta = requestBetaHeader
}
if (
typeof _requestBodyNonStream?.speed === 'string' &&
_requestBodyNonStream.speed.trim()
) {
usageObject.request_speed = _requestBodyNonStream.speed.trim().toLowerCase()
}
if (typeof jsonData.usage.speed === 'string' && jsonData.usage.speed.trim()) {
usageObject.speed = jsonData.usage.speed.trim().toLowerCase()
}
// 添加 cache_creation 子对象
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
usageObject.cache_creation = {
ephemeral_5m_input_tokens: ephemeral5mTokens,
ephemeral_1h_input_tokens: ephemeral1hTokens
}
}
// 记录真实的token使用量包含模型信息和所有4种token以及账户ID
const { accountId: responseAccountId } = response
const nonStreamCosts = await apiKeyService.recordUsage(
const nonStreamCosts = await apiKeyService.recordUsageWithDetails(
_apiKeyIdNonStream,
inputTokens,
outputTokens,
cacheCreateTokens,
cacheReadTokens,
usageObject,
model,
responseAccountId,
accountType
@@ -1259,7 +1299,7 @@ async function handleMessagesRequest(req, res) {
usageRecorded = true
logger.api(
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens} (5m: ${ephemeral5mTokens}, 1h: ${ephemeral1hTokens}), Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
)
} else {
logger.warn('⚠️ No usage data found in Claude API JSON response')

View File

@@ -11,7 +11,11 @@ const USAGE_FIELDS = [
'totalCacheCreateTokens',
'cacheCreateTokens',
'totalCacheReadTokens',
'cacheReadTokens'
'cacheReadTokens',
'ephemeral5mTokens',
'ephemeral1hTokens',
'totalEphemeral5mTokens',
'totalEphemeral1hTokens'
]
class CostInitService {
@@ -202,8 +206,8 @@ class CostInitService {
}
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
const eph5m = parseInt(data.ephemeral5mTokens) || 0
const eph1h = parseInt(data.ephemeral1hTokens) || 0
const eph5m = parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
const eph1h = parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
if (eph5m > 0 || eph1h > 0) {
usage.cache_creation = {
ephemeral_5m_input_tokens: eph5m,

View File

@@ -28,43 +28,7 @@ class PricingService {
this.updateTimer = null // 定时更新任务句柄
this.hashSyncInProgress = false // 哈希同步状态
// 硬编码的 1 小时缓存价格(美元/百万 token
// ephemeral_5m 的价格使用 model_pricing.json 中的 cache_creation_input_token_cost
// ephemeral_1h 的价格需要硬编码
this.ephemeral1hPricing = {
// Opus 系列: $30/MTok
'claude-opus-4-1': 0.00003,
'claude-opus-4-1-20250805': 0.00003,
'claude-opus-4': 0.00003,
'claude-opus-4-20250514': 0.00003,
'claude-3-opus': 0.00003,
'claude-3-opus-latest': 0.00003,
'claude-3-opus-20240229': 0.00003,
// Sonnet 系列: $6/MTok
'claude-3-5-sonnet': 0.000006,
'claude-3-5-sonnet-latest': 0.000006,
'claude-3-5-sonnet-20241022': 0.000006,
'claude-3-5-sonnet-20240620': 0.000006,
'claude-3-sonnet': 0.000006,
'claude-3-sonnet-20240307': 0.000006,
'claude-sonnet-3': 0.000006,
'claude-sonnet-3-5': 0.000006,
'claude-sonnet-3-7': 0.000006,
'claude-sonnet-4': 0.000006,
'claude-sonnet-4-20250514': 0.000006,
// Haiku 系列: $1.6/MTok
'claude-3-5-haiku': 0.0000016,
'claude-3-5-haiku-latest': 0.0000016,
'claude-3-5-haiku-20241022': 0.0000016,
'claude-3-haiku': 0.0000016,
'claude-3-haiku-20240307': 0.0000016,
'claude-haiku-3': 0.0000016,
'claude-haiku-3-5': 0.0000016
}
// Claude Prompt Caching 官方倍率(基于输入价格)
// Claude Prompt Caching 官方倍率(基于输入价格)— 仅作为 model_pricing.json 缺失字段时的兜底
this.claudeCacheMultipliers = {
write5m: 1.25,
write1h: 2,
@@ -536,50 +500,6 @@ class PricingService {
return modelName.replace(/\[1m\]/gi, '').trim()
}
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
getEphemeral1hPricing(modelName, pricing = null) {
if (
pricing?.cache_creation_input_token_cost_above_1hr !== null &&
pricing?.cache_creation_input_token_cost_above_1hr !== undefined
) {
return pricing.cache_creation_input_token_cost_above_1hr
}
if (!modelName) {
return 0
}
// 尝试直接匹配
if (
this.ephemeral1hPricing[modelName] !== null &&
this.ephemeral1hPricing[modelName] !== undefined
) {
return this.ephemeral1hPricing[modelName]
}
// 处理各种模型名称变体
const modelLower = modelName.toLowerCase()
// 检查是否是 Opus 系列
if (modelLower.includes('opus')) {
return 0.00001 // $10/MTok
}
// 检查是否是 Sonnet 系列
if (modelLower.includes('sonnet')) {
return 0.000006 // $6/MTok
}
// 检查是否是 Haiku 系列
if (modelLower.includes('haiku')) {
return 0.000002 // $2/MTok
}
// 默认返回 0未知模型
logger.debug(`💰 No 1h cache pricing found for model: ${modelName}`)
return 0
}
// 计算使用费用
calculateCost(usage, modelName) {
const normalizedModelName = this.stripLongContextSuffix(modelName)
@@ -675,43 +595,58 @@ class PricingService {
: baseOutputPrice
: baseOutputPrice
// 应用 Fast Mode 倍率(在 200K+ 价格之上叠加)
if (fastMultiplier > 1) {
actualInputPrice *= fastMultiplier
actualOutputPrice *= fastMultiplier
}
// 缓存价格:优先从 model_pricing.json 取Claude 缺失时用倍率兜底
let actualCacheCreatePrice = 0
let actualCacheReadPrice = 0
let actualEphemeral1hPrice = 0
if (isClaudeModel) {
// Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
} else {
actualCacheCreatePrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
if (useLongContextPricing) {
// 200K+Claude 仅用 above_200k 专用字段,缺失留 0 让下方兜底从 actualInputPrice 推导
actualCacheCreatePrice = isClaudeModel
? pricing.cache_creation_input_token_cost_above_200k_tokens || 0
: pricing.cache_creation_input_token_cost_above_200k_tokens ||
pricing.cache_creation_input_token_cost ||
0
: pricing.cache_creation_input_token_cost || 0
actualCacheReadPrice = useLongContextPricing
? pricing.cache_read_input_token_cost_above_200k_tokens ||
actualCacheReadPrice = isClaudeModel
? pricing.cache_read_input_token_cost_above_200k_tokens || 0
: pricing.cache_read_input_token_cost_above_200k_tokens ||
pricing.cache_read_input_token_cost ||
0
: pricing.cache_read_input_token_cost || 0
const has1h200k =
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
actualEphemeral1hPrice = has1h200k
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
: isClaudeModel
? 0
: pricing.cache_creation_input_token_cost_above_1hr || 0
} else {
actualCacheCreatePrice = pricing.cache_creation_input_token_cost || 0
actualCacheReadPrice = pricing.cache_read_input_token_cost || 0
actualEphemeral1hPrice = pricing.cache_creation_input_token_cost_above_1hr || 0
}
const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
// Claude 兜底pricing 字段缺失时用倍率从 actualInputPrice 推导
// 此时 actualInputPrice 尚未含 fastMultiplier下方统一应用
if (isClaudeModel) {
if (!actualCacheCreatePrice) {
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
}
if (!actualCacheReadPrice) {
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
}
if (!actualEphemeral1hPrice) {
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
}
}
// 非 Claude 模型维持原有字段优先级
actualEphemeral1hPrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
: defaultEphemeral1hPrice
: defaultEphemeral1hPrice
// Fast Mode 倍率:统一一次性应用于所有价格
if (fastMultiplier > 1) {
actualInputPrice *= fastMultiplier
actualOutputPrice *= fastMultiplier
actualCacheCreatePrice *= fastMultiplier
actualCacheReadPrice *= fastMultiplier
actualEphemeral1hPrice *= fastMultiplier
}
// 计算各项费用

View File

@@ -239,6 +239,16 @@ class CostCalculator {
aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
}
// 如果有 ephemeral 拆分数据,构建 cache_creation 子对象
const eph5m = aggregatedUsage.ephemeral5mTokens || aggregatedUsage.totalEphemeral5mTokens || 0
const eph1h = aggregatedUsage.ephemeral1hTokens || aggregatedUsage.totalEphemeral1hTokens || 0
if (eph5m > 0 || eph1h > 0) {
usage.cache_creation = {
ephemeral_5m_input_tokens: eph5m,
ephemeral_1h_input_tokens: eph1h
}
}
return this.calculateCost(usage, model)
}