mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-03-29 21:56:18 +00:00
fix: 修复1h缓存定价与sub2api项目对齐
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1222,21 +1222,61 @@ async function handleMessagesRequest(req, res) {
|
||||
) {
|
||||
const inputTokens = jsonData.usage.input_tokens || 0
|
||||
const outputTokens = jsonData.usage.output_tokens || 0
|
||||
const cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
|
||||
let cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
|
||||
let ephemeral5mTokens = 0
|
||||
let ephemeral1hTokens = 0
|
||||
|
||||
if (jsonData.usage.cache_creation && typeof jsonData.usage.cache_creation === 'object') {
|
||||
ephemeral5mTokens = jsonData.usage.cache_creation.ephemeral_5m_input_tokens || 0
|
||||
ephemeral1hTokens = jsonData.usage.cache_creation.ephemeral_1h_input_tokens || 0
|
||||
cacheCreateTokens = ephemeral5mTokens + ephemeral1hTokens
|
||||
}
|
||||
|
||||
const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
|
||||
// Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
|
||||
const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
|
||||
const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
|
||||
const model = usageBaseModel || rawModel
|
||||
|
||||
// 构建 usage 对象以传递给 recordUsageWithDetails
|
||||
const usageObject = {
|
||||
input_tokens: inputTokens,
|
||||
output_tokens: outputTokens,
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
}
|
||||
|
||||
// 添加请求元信息
|
||||
const requestBetaHeader =
|
||||
_headersNonStream['anthropic-beta'] ||
|
||||
_headersNonStream['Anthropic-Beta'] ||
|
||||
_headersNonStream['ANTHROPIC-BETA']
|
||||
if (requestBetaHeader) {
|
||||
usageObject.request_anthropic_beta = requestBetaHeader
|
||||
}
|
||||
if (
|
||||
typeof _requestBodyNonStream?.speed === 'string' &&
|
||||
_requestBodyNonStream.speed.trim()
|
||||
) {
|
||||
usageObject.request_speed = _requestBodyNonStream.speed.trim().toLowerCase()
|
||||
}
|
||||
if (typeof jsonData.usage.speed === 'string' && jsonData.usage.speed.trim()) {
|
||||
usageObject.speed = jsonData.usage.speed.trim().toLowerCase()
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象
|
||||
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
|
||||
usageObject.cache_creation = {
|
||||
ephemeral_5m_input_tokens: ephemeral5mTokens,
|
||||
ephemeral_1h_input_tokens: ephemeral1hTokens
|
||||
}
|
||||
}
|
||||
|
||||
// 记录真实的token使用量(包含模型信息和所有4种token以及账户ID)
|
||||
const { accountId: responseAccountId } = response
|
||||
const nonStreamCosts = await apiKeyService.recordUsage(
|
||||
const nonStreamCosts = await apiKeyService.recordUsageWithDetails(
|
||||
_apiKeyIdNonStream,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreateTokens,
|
||||
cacheReadTokens,
|
||||
usageObject,
|
||||
model,
|
||||
responseAccountId,
|
||||
accountType
|
||||
@@ -1259,7 +1299,7 @@ async function handleMessagesRequest(req, res) {
|
||||
|
||||
usageRecorded = true
|
||||
logger.api(
|
||||
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens} (5m: ${ephemeral5mTokens}, 1h: ${ephemeral1hTokens}), Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||
)
|
||||
} else {
|
||||
logger.warn('⚠️ No usage data found in Claude API JSON response')
|
||||
|
||||
@@ -11,7 +11,11 @@ const USAGE_FIELDS = [
|
||||
'totalCacheCreateTokens',
|
||||
'cacheCreateTokens',
|
||||
'totalCacheReadTokens',
|
||||
'cacheReadTokens'
|
||||
'cacheReadTokens',
|
||||
'ephemeral5mTokens',
|
||||
'ephemeral1hTokens',
|
||||
'totalEphemeral5mTokens',
|
||||
'totalEphemeral1hTokens'
|
||||
]
|
||||
|
||||
class CostInitService {
|
||||
@@ -202,8 +206,8 @@ class CostInitService {
|
||||
}
|
||||
|
||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||
const eph5m = parseInt(data.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(data.ephemeral1hTokens) || 0
|
||||
const eph5m = parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
|
||||
const eph1h = parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
|
||||
@@ -28,43 +28,7 @@ class PricingService {
|
||||
this.updateTimer = null // 定时更新任务句柄
|
||||
this.hashSyncInProgress = false // 哈希同步状态
|
||||
|
||||
// 硬编码的 1 小时缓存价格(美元/百万 token)
|
||||
// ephemeral_5m 的价格使用 model_pricing.json 中的 cache_creation_input_token_cost
|
||||
// ephemeral_1h 的价格需要硬编码
|
||||
this.ephemeral1hPricing = {
|
||||
// Opus 系列: $30/MTok
|
||||
'claude-opus-4-1': 0.00003,
|
||||
'claude-opus-4-1-20250805': 0.00003,
|
||||
'claude-opus-4': 0.00003,
|
||||
'claude-opus-4-20250514': 0.00003,
|
||||
'claude-3-opus': 0.00003,
|
||||
'claude-3-opus-latest': 0.00003,
|
||||
'claude-3-opus-20240229': 0.00003,
|
||||
|
||||
// Sonnet 系列: $6/MTok
|
||||
'claude-3-5-sonnet': 0.000006,
|
||||
'claude-3-5-sonnet-latest': 0.000006,
|
||||
'claude-3-5-sonnet-20241022': 0.000006,
|
||||
'claude-3-5-sonnet-20240620': 0.000006,
|
||||
'claude-3-sonnet': 0.000006,
|
||||
'claude-3-sonnet-20240307': 0.000006,
|
||||
'claude-sonnet-3': 0.000006,
|
||||
'claude-sonnet-3-5': 0.000006,
|
||||
'claude-sonnet-3-7': 0.000006,
|
||||
'claude-sonnet-4': 0.000006,
|
||||
'claude-sonnet-4-20250514': 0.000006,
|
||||
|
||||
// Haiku 系列: $1.6/MTok
|
||||
'claude-3-5-haiku': 0.0000016,
|
||||
'claude-3-5-haiku-latest': 0.0000016,
|
||||
'claude-3-5-haiku-20241022': 0.0000016,
|
||||
'claude-3-haiku': 0.0000016,
|
||||
'claude-3-haiku-20240307': 0.0000016,
|
||||
'claude-haiku-3': 0.0000016,
|
||||
'claude-haiku-3-5': 0.0000016
|
||||
}
|
||||
|
||||
// Claude Prompt Caching 官方倍率(基于输入价格)
|
||||
// Claude Prompt Caching 官方倍率(基于输入价格)— 仅作为 model_pricing.json 缺失字段时的兜底
|
||||
this.claudeCacheMultipliers = {
|
||||
write5m: 1.25,
|
||||
write1h: 2,
|
||||
@@ -536,50 +500,6 @@ class PricingService {
|
||||
return modelName.replace(/\[1m\]/gi, '').trim()
|
||||
}
|
||||
|
||||
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
|
||||
getEphemeral1hPricing(modelName, pricing = null) {
|
||||
if (
|
||||
pricing?.cache_creation_input_token_cost_above_1hr !== null &&
|
||||
pricing?.cache_creation_input_token_cost_above_1hr !== undefined
|
||||
) {
|
||||
return pricing.cache_creation_input_token_cost_above_1hr
|
||||
}
|
||||
|
||||
if (!modelName) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// 尝试直接匹配
|
||||
if (
|
||||
this.ephemeral1hPricing[modelName] !== null &&
|
||||
this.ephemeral1hPricing[modelName] !== undefined
|
||||
) {
|
||||
return this.ephemeral1hPricing[modelName]
|
||||
}
|
||||
|
||||
// 处理各种模型名称变体
|
||||
const modelLower = modelName.toLowerCase()
|
||||
|
||||
// 检查是否是 Opus 系列
|
||||
if (modelLower.includes('opus')) {
|
||||
return 0.00001 // $10/MTok
|
||||
}
|
||||
|
||||
// 检查是否是 Sonnet 系列
|
||||
if (modelLower.includes('sonnet')) {
|
||||
return 0.000006 // $6/MTok
|
||||
}
|
||||
|
||||
// 检查是否是 Haiku 系列
|
||||
if (modelLower.includes('haiku')) {
|
||||
return 0.000002 // $2/MTok
|
||||
}
|
||||
|
||||
// 默认返回 0(未知模型)
|
||||
logger.debug(`💰 No 1h cache pricing found for model: ${modelName}`)
|
||||
return 0
|
||||
}
|
||||
|
||||
// 计算使用费用
|
||||
calculateCost(usage, modelName) {
|
||||
const normalizedModelName = this.stripLongContextSuffix(modelName)
|
||||
@@ -675,43 +595,58 @@ class PricingService {
|
||||
: baseOutputPrice
|
||||
: baseOutputPrice
|
||||
|
||||
// 应用 Fast Mode 倍率(在 200K+ 价格之上叠加)
|
||||
if (fastMultiplier > 1) {
|
||||
actualInputPrice *= fastMultiplier
|
||||
actualOutputPrice *= fastMultiplier
|
||||
}
|
||||
|
||||
// 缓存价格:优先从 model_pricing.json 取,Claude 缺失时用倍率兜底
|
||||
let actualCacheCreatePrice = 0
|
||||
let actualCacheReadPrice = 0
|
||||
let actualEphemeral1hPrice = 0
|
||||
|
||||
if (isClaudeModel) {
|
||||
// Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差
|
||||
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
|
||||
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
|
||||
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
|
||||
} else {
|
||||
actualCacheCreatePrice = useLongContextPricing
|
||||
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
|
||||
if (useLongContextPricing) {
|
||||
// 200K+:Claude 仅用 above_200k 专用字段,缺失留 0 让下方兜底从 actualInputPrice 推导
|
||||
actualCacheCreatePrice = isClaudeModel
|
||||
? pricing.cache_creation_input_token_cost_above_200k_tokens || 0
|
||||
: pricing.cache_creation_input_token_cost_above_200k_tokens ||
|
||||
pricing.cache_creation_input_token_cost ||
|
||||
0
|
||||
: pricing.cache_creation_input_token_cost || 0
|
||||
|
||||
actualCacheReadPrice = useLongContextPricing
|
||||
? pricing.cache_read_input_token_cost_above_200k_tokens ||
|
||||
actualCacheReadPrice = isClaudeModel
|
||||
? pricing.cache_read_input_token_cost_above_200k_tokens || 0
|
||||
: pricing.cache_read_input_token_cost_above_200k_tokens ||
|
||||
pricing.cache_read_input_token_cost ||
|
||||
0
|
||||
: pricing.cache_read_input_token_cost || 0
|
||||
const has1h200k =
|
||||
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
|
||||
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
|
||||
actualEphemeral1hPrice = has1h200k
|
||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
|
||||
: isClaudeModel
|
||||
? 0
|
||||
: pricing.cache_creation_input_token_cost_above_1hr || 0
|
||||
} else {
|
||||
actualCacheCreatePrice = pricing.cache_creation_input_token_cost || 0
|
||||
actualCacheReadPrice = pricing.cache_read_input_token_cost || 0
|
||||
actualEphemeral1hPrice = pricing.cache_creation_input_token_cost_above_1hr || 0
|
||||
}
|
||||
|
||||
const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
|
||||
// Claude 兜底:pricing 字段缺失时用倍率从 actualInputPrice 推导
|
||||
// 此时 actualInputPrice 尚未含 fastMultiplier,下方统一应用
|
||||
if (isClaudeModel) {
|
||||
if (!actualCacheCreatePrice) {
|
||||
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
|
||||
}
|
||||
if (!actualCacheReadPrice) {
|
||||
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
|
||||
}
|
||||
if (!actualEphemeral1hPrice) {
|
||||
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
|
||||
}
|
||||
}
|
||||
|
||||
// 非 Claude 模型维持原有字段优先级
|
||||
actualEphemeral1hPrice = useLongContextPricing
|
||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
|
||||
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
|
||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
|
||||
: defaultEphemeral1hPrice
|
||||
: defaultEphemeral1hPrice
|
||||
// Fast Mode 倍率:统一一次性应用于所有价格
|
||||
if (fastMultiplier > 1) {
|
||||
actualInputPrice *= fastMultiplier
|
||||
actualOutputPrice *= fastMultiplier
|
||||
actualCacheCreatePrice *= fastMultiplier
|
||||
actualCacheReadPrice *= fastMultiplier
|
||||
actualEphemeral1hPrice *= fastMultiplier
|
||||
}
|
||||
|
||||
// 计算各项费用
|
||||
|
||||
@@ -239,6 +239,16 @@ class CostCalculator {
|
||||
aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
|
||||
}
|
||||
|
||||
// 如果有 ephemeral 拆分数据,构建 cache_creation 子对象
|
||||
const eph5m = aggregatedUsage.ephemeral5mTokens || aggregatedUsage.totalEphemeral5mTokens || 0
|
||||
const eph1h = aggregatedUsage.ephemeral1hTokens || aggregatedUsage.totalEphemeral1hTokens || 0
|
||||
if (eph5m > 0 || eph1h > 0) {
|
||||
usage.cache_creation = {
|
||||
ephemeral_5m_input_tokens: eph5m,
|
||||
ephemeral_1h_input_tokens: eph1h
|
||||
}
|
||||
}
|
||||
|
||||
return this.calculateCost(usage, model)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user