mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-03-30 00:51:04 +00:00
fix: 修复1h缓存定价与sub2api项目对齐
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1222,21 +1222,61 @@ async function handleMessagesRequest(req, res) {
|
|||||||
) {
|
) {
|
||||||
const inputTokens = jsonData.usage.input_tokens || 0
|
const inputTokens = jsonData.usage.input_tokens || 0
|
||||||
const outputTokens = jsonData.usage.output_tokens || 0
|
const outputTokens = jsonData.usage.output_tokens || 0
|
||||||
const cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
|
let cacheCreateTokens = jsonData.usage.cache_creation_input_tokens || 0
|
||||||
|
let ephemeral5mTokens = 0
|
||||||
|
let ephemeral1hTokens = 0
|
||||||
|
|
||||||
|
if (jsonData.usage.cache_creation && typeof jsonData.usage.cache_creation === 'object') {
|
||||||
|
ephemeral5mTokens = jsonData.usage.cache_creation.ephemeral_5m_input_tokens || 0
|
||||||
|
ephemeral1hTokens = jsonData.usage.cache_creation.ephemeral_1h_input_tokens || 0
|
||||||
|
cacheCreateTokens = ephemeral5mTokens + ephemeral1hTokens
|
||||||
|
}
|
||||||
|
|
||||||
const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
|
const cacheReadTokens = jsonData.usage.cache_read_input_tokens || 0
|
||||||
// Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
|
// Parse the model to remove vendor prefix if present (e.g., "ccr,gemini-2.5-pro" -> "gemini-2.5-pro")
|
||||||
const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
|
const rawModel = jsonData.model || _requestBodyNonStream.model || 'unknown'
|
||||||
const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
|
const { baseModel: usageBaseModel } = parseVendorPrefixedModel(rawModel)
|
||||||
const model = usageBaseModel || rawModel
|
const model = usageBaseModel || rawModel
|
||||||
|
|
||||||
|
// 构建 usage 对象以传递给 recordUsageWithDetails
|
||||||
|
const usageObject = {
|
||||||
|
input_tokens: inputTokens,
|
||||||
|
output_tokens: outputTokens,
|
||||||
|
cache_creation_input_tokens: cacheCreateTokens,
|
||||||
|
cache_read_input_tokens: cacheReadTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
// 添加请求元信息
|
||||||
|
const requestBetaHeader =
|
||||||
|
_headersNonStream['anthropic-beta'] ||
|
||||||
|
_headersNonStream['Anthropic-Beta'] ||
|
||||||
|
_headersNonStream['ANTHROPIC-BETA']
|
||||||
|
if (requestBetaHeader) {
|
||||||
|
usageObject.request_anthropic_beta = requestBetaHeader
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
typeof _requestBodyNonStream?.speed === 'string' &&
|
||||||
|
_requestBodyNonStream.speed.trim()
|
||||||
|
) {
|
||||||
|
usageObject.request_speed = _requestBodyNonStream.speed.trim().toLowerCase()
|
||||||
|
}
|
||||||
|
if (typeof jsonData.usage.speed === 'string' && jsonData.usage.speed.trim()) {
|
||||||
|
usageObject.speed = jsonData.usage.speed.trim().toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 添加 cache_creation 子对象
|
||||||
|
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
|
||||||
|
usageObject.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: ephemeral5mTokens,
|
||||||
|
ephemeral_1h_input_tokens: ephemeral1hTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 记录真实的token使用量(包含模型信息和所有4种token以及账户ID)
|
// 记录真实的token使用量(包含模型信息和所有4种token以及账户ID)
|
||||||
const { accountId: responseAccountId } = response
|
const { accountId: responseAccountId } = response
|
||||||
const nonStreamCosts = await apiKeyService.recordUsage(
|
const nonStreamCosts = await apiKeyService.recordUsageWithDetails(
|
||||||
_apiKeyIdNonStream,
|
_apiKeyIdNonStream,
|
||||||
inputTokens,
|
usageObject,
|
||||||
outputTokens,
|
|
||||||
cacheCreateTokens,
|
|
||||||
cacheReadTokens,
|
|
||||||
model,
|
model,
|
||||||
responseAccountId,
|
responseAccountId,
|
||||||
accountType
|
accountType
|
||||||
@@ -1259,7 +1299,7 @@ async function handleMessagesRequest(req, res) {
|
|||||||
|
|
||||||
usageRecorded = true
|
usageRecorded = true
|
||||||
logger.api(
|
logger.api(
|
||||||
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens}, Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
`📊 Non-stream usage recorded (real) - Model: ${model}, Input: ${inputTokens}, Output: ${outputTokens}, Cache Create: ${cacheCreateTokens} (5m: ${ephemeral5mTokens}, 1h: ${ephemeral1hTokens}), Cache Read: ${cacheReadTokens}, Total: ${inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens} tokens`
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
logger.warn('⚠️ No usage data found in Claude API JSON response')
|
logger.warn('⚠️ No usage data found in Claude API JSON response')
|
||||||
|
|||||||
@@ -11,7 +11,11 @@ const USAGE_FIELDS = [
|
|||||||
'totalCacheCreateTokens',
|
'totalCacheCreateTokens',
|
||||||
'cacheCreateTokens',
|
'cacheCreateTokens',
|
||||||
'totalCacheReadTokens',
|
'totalCacheReadTokens',
|
||||||
'cacheReadTokens'
|
'cacheReadTokens',
|
||||||
|
'ephemeral5mTokens',
|
||||||
|
'ephemeral1hTokens',
|
||||||
|
'totalEphemeral5mTokens',
|
||||||
|
'totalEphemeral1hTokens'
|
||||||
]
|
]
|
||||||
|
|
||||||
class CostInitService {
|
class CostInitService {
|
||||||
@@ -202,8 +206,8 @@ class CostInitService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
// 添加 cache_creation 子对象以支持精确 ephemeral 定价
|
||||||
const eph5m = parseInt(data.ephemeral5mTokens) || 0
|
const eph5m = parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0
|
||||||
const eph1h = parseInt(data.ephemeral1hTokens) || 0
|
const eph1h = parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0
|
||||||
if (eph5m > 0 || eph1h > 0) {
|
if (eph5m > 0 || eph1h > 0) {
|
||||||
usage.cache_creation = {
|
usage.cache_creation = {
|
||||||
ephemeral_5m_input_tokens: eph5m,
|
ephemeral_5m_input_tokens: eph5m,
|
||||||
|
|||||||
@@ -28,43 +28,7 @@ class PricingService {
|
|||||||
this.updateTimer = null // 定时更新任务句柄
|
this.updateTimer = null // 定时更新任务句柄
|
||||||
this.hashSyncInProgress = false // 哈希同步状态
|
this.hashSyncInProgress = false // 哈希同步状态
|
||||||
|
|
||||||
// 硬编码的 1 小时缓存价格(美元/百万 token)
|
// Claude Prompt Caching 官方倍率(基于输入价格)— 仅作为 model_pricing.json 缺失字段时的兜底
|
||||||
// ephemeral_5m 的价格使用 model_pricing.json 中的 cache_creation_input_token_cost
|
|
||||||
// ephemeral_1h 的价格需要硬编码
|
|
||||||
this.ephemeral1hPricing = {
|
|
||||||
// Opus 系列: $30/MTok
|
|
||||||
'claude-opus-4-1': 0.00003,
|
|
||||||
'claude-opus-4-1-20250805': 0.00003,
|
|
||||||
'claude-opus-4': 0.00003,
|
|
||||||
'claude-opus-4-20250514': 0.00003,
|
|
||||||
'claude-3-opus': 0.00003,
|
|
||||||
'claude-3-opus-latest': 0.00003,
|
|
||||||
'claude-3-opus-20240229': 0.00003,
|
|
||||||
|
|
||||||
// Sonnet 系列: $6/MTok
|
|
||||||
'claude-3-5-sonnet': 0.000006,
|
|
||||||
'claude-3-5-sonnet-latest': 0.000006,
|
|
||||||
'claude-3-5-sonnet-20241022': 0.000006,
|
|
||||||
'claude-3-5-sonnet-20240620': 0.000006,
|
|
||||||
'claude-3-sonnet': 0.000006,
|
|
||||||
'claude-3-sonnet-20240307': 0.000006,
|
|
||||||
'claude-sonnet-3': 0.000006,
|
|
||||||
'claude-sonnet-3-5': 0.000006,
|
|
||||||
'claude-sonnet-3-7': 0.000006,
|
|
||||||
'claude-sonnet-4': 0.000006,
|
|
||||||
'claude-sonnet-4-20250514': 0.000006,
|
|
||||||
|
|
||||||
// Haiku 系列: $1.6/MTok
|
|
||||||
'claude-3-5-haiku': 0.0000016,
|
|
||||||
'claude-3-5-haiku-latest': 0.0000016,
|
|
||||||
'claude-3-5-haiku-20241022': 0.0000016,
|
|
||||||
'claude-3-haiku': 0.0000016,
|
|
||||||
'claude-3-haiku-20240307': 0.0000016,
|
|
||||||
'claude-haiku-3': 0.0000016,
|
|
||||||
'claude-haiku-3-5': 0.0000016
|
|
||||||
}
|
|
||||||
|
|
||||||
// Claude Prompt Caching 官方倍率(基于输入价格)
|
|
||||||
this.claudeCacheMultipliers = {
|
this.claudeCacheMultipliers = {
|
||||||
write5m: 1.25,
|
write5m: 1.25,
|
||||||
write1h: 2,
|
write1h: 2,
|
||||||
@@ -536,50 +500,6 @@ class PricingService {
|
|||||||
return modelName.replace(/\[1m\]/gi, '').trim()
|
return modelName.replace(/\[1m\]/gi, '').trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
|
|
||||||
getEphemeral1hPricing(modelName, pricing = null) {
|
|
||||||
if (
|
|
||||||
pricing?.cache_creation_input_token_cost_above_1hr !== null &&
|
|
||||||
pricing?.cache_creation_input_token_cost_above_1hr !== undefined
|
|
||||||
) {
|
|
||||||
return pricing.cache_creation_input_token_cost_above_1hr
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!modelName) {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// 尝试直接匹配
|
|
||||||
if (
|
|
||||||
this.ephemeral1hPricing[modelName] !== null &&
|
|
||||||
this.ephemeral1hPricing[modelName] !== undefined
|
|
||||||
) {
|
|
||||||
return this.ephemeral1hPricing[modelName]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 处理各种模型名称变体
|
|
||||||
const modelLower = modelName.toLowerCase()
|
|
||||||
|
|
||||||
// 检查是否是 Opus 系列
|
|
||||||
if (modelLower.includes('opus')) {
|
|
||||||
return 0.00001 // $10/MTok
|
|
||||||
}
|
|
||||||
|
|
||||||
// 检查是否是 Sonnet 系列
|
|
||||||
if (modelLower.includes('sonnet')) {
|
|
||||||
return 0.000006 // $6/MTok
|
|
||||||
}
|
|
||||||
|
|
||||||
// 检查是否是 Haiku 系列
|
|
||||||
if (modelLower.includes('haiku')) {
|
|
||||||
return 0.000002 // $2/MTok
|
|
||||||
}
|
|
||||||
|
|
||||||
// 默认返回 0(未知模型)
|
|
||||||
logger.debug(`💰 No 1h cache pricing found for model: ${modelName}`)
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// 计算使用费用
|
// 计算使用费用
|
||||||
calculateCost(usage, modelName) {
|
calculateCost(usage, modelName) {
|
||||||
const normalizedModelName = this.stripLongContextSuffix(modelName)
|
const normalizedModelName = this.stripLongContextSuffix(modelName)
|
||||||
@@ -675,43 +595,58 @@ class PricingService {
|
|||||||
: baseOutputPrice
|
: baseOutputPrice
|
||||||
: baseOutputPrice
|
: baseOutputPrice
|
||||||
|
|
||||||
// 应用 Fast Mode 倍率(在 200K+ 价格之上叠加)
|
// 缓存价格:优先从 model_pricing.json 取,Claude 缺失时用倍率兜底
|
||||||
if (fastMultiplier > 1) {
|
|
||||||
actualInputPrice *= fastMultiplier
|
|
||||||
actualOutputPrice *= fastMultiplier
|
|
||||||
}
|
|
||||||
|
|
||||||
let actualCacheCreatePrice = 0
|
let actualCacheCreatePrice = 0
|
||||||
let actualCacheReadPrice = 0
|
let actualCacheReadPrice = 0
|
||||||
let actualEphemeral1hPrice = 0
|
let actualEphemeral1hPrice = 0
|
||||||
|
|
||||||
if (isClaudeModel) {
|
if (useLongContextPricing) {
|
||||||
// Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差
|
// 200K+:Claude 仅用 above_200k 专用字段,缺失留 0 让下方兜底从 actualInputPrice 推导
|
||||||
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
|
actualCacheCreatePrice = isClaudeModel
|
||||||
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
|
? pricing.cache_creation_input_token_cost_above_200k_tokens || 0
|
||||||
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
|
: pricing.cache_creation_input_token_cost_above_200k_tokens ||
|
||||||
} else {
|
|
||||||
actualCacheCreatePrice = useLongContextPricing
|
|
||||||
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
|
|
||||||
pricing.cache_creation_input_token_cost ||
|
pricing.cache_creation_input_token_cost ||
|
||||||
0
|
0
|
||||||
: pricing.cache_creation_input_token_cost || 0
|
actualCacheReadPrice = isClaudeModel
|
||||||
|
? pricing.cache_read_input_token_cost_above_200k_tokens || 0
|
||||||
actualCacheReadPrice = useLongContextPricing
|
: pricing.cache_read_input_token_cost_above_200k_tokens ||
|
||||||
? pricing.cache_read_input_token_cost_above_200k_tokens ||
|
|
||||||
pricing.cache_read_input_token_cost ||
|
pricing.cache_read_input_token_cost ||
|
||||||
0
|
0
|
||||||
: pricing.cache_read_input_token_cost || 0
|
const has1h200k =
|
||||||
|
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
|
||||||
|
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
|
||||||
|
actualEphemeral1hPrice = has1h200k
|
||||||
|
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
|
||||||
|
: isClaudeModel
|
||||||
|
? 0
|
||||||
|
: pricing.cache_creation_input_token_cost_above_1hr || 0
|
||||||
|
} else {
|
||||||
|
actualCacheCreatePrice = pricing.cache_creation_input_token_cost || 0
|
||||||
|
actualCacheReadPrice = pricing.cache_read_input_token_cost || 0
|
||||||
|
actualEphemeral1hPrice = pricing.cache_creation_input_token_cost_above_1hr || 0
|
||||||
|
}
|
||||||
|
|
||||||
const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
|
// Claude 兜底:pricing 字段缺失时用倍率从 actualInputPrice 推导
|
||||||
|
// 此时 actualInputPrice 尚未含 fastMultiplier,下方统一应用
|
||||||
|
if (isClaudeModel) {
|
||||||
|
if (!actualCacheCreatePrice) {
|
||||||
|
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
|
||||||
|
}
|
||||||
|
if (!actualCacheReadPrice) {
|
||||||
|
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
|
||||||
|
}
|
||||||
|
if (!actualEphemeral1hPrice) {
|
||||||
|
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 非 Claude 模型维持原有字段优先级
|
// Fast Mode 倍率:统一一次性应用于所有价格
|
||||||
actualEphemeral1hPrice = useLongContextPricing
|
if (fastMultiplier > 1) {
|
||||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
|
actualInputPrice *= fastMultiplier
|
||||||
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
|
actualOutputPrice *= fastMultiplier
|
||||||
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
|
actualCacheCreatePrice *= fastMultiplier
|
||||||
: defaultEphemeral1hPrice
|
actualCacheReadPrice *= fastMultiplier
|
||||||
: defaultEphemeral1hPrice
|
actualEphemeral1hPrice *= fastMultiplier
|
||||||
}
|
}
|
||||||
|
|
||||||
// 计算各项费用
|
// 计算各项费用
|
||||||
|
|||||||
@@ -239,6 +239,16 @@ class CostCalculator {
|
|||||||
aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
|
aggregatedUsage.cacheReadTokens || aggregatedUsage.totalCacheReadTokens || 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果有 ephemeral 拆分数据,构建 cache_creation 子对象
|
||||||
|
const eph5m = aggregatedUsage.ephemeral5mTokens || aggregatedUsage.totalEphemeral5mTokens || 0
|
||||||
|
const eph1h = aggregatedUsage.ephemeral1hTokens || aggregatedUsage.totalEphemeral1hTokens || 0
|
||||||
|
if (eph5m > 0 || eph1h > 0) {
|
||||||
|
usage.cache_creation = {
|
||||||
|
ephemeral_5m_input_tokens: eph5m,
|
||||||
|
ephemeral_1h_input_tokens: eph1h
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return this.calculateCost(usage, model)
|
return this.calculateCost(usage, model)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user