mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-01-23 20:12:11 +00:00
feat: 实现基于费用的速率限制功能
- 新增 rateLimitCost 字段,支持按费用进行速率限制 - 新增 weeklyOpusCostLimit 字段,支持 Opus 模型周费用限制 - 优化速率限制逻辑,支持费用、请求数、token多维度控制 - 更新前端界面,添加费用限制配置选项 - 增强账户管理功能,支持费用统计和限制 - 改进 Redis 数据模型,支持费用计数器 - 优化价格计算服务,支持更精确的成本核算 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -55,6 +55,17 @@ class PricingService {
|
||||
'claude-haiku-3': 0.0000016,
|
||||
'claude-haiku-3-5': 0.0000016
|
||||
}
|
||||
|
||||
// 硬编码的 1M 上下文模型价格(美元/token)
|
||||
// 当总输入 tokens 超过 200k 时使用这些价格
|
||||
this.longContextPricing = {
|
||||
// claude-sonnet-4-20250514[1m] 模型的 1M 上下文价格
|
||||
'claude-sonnet-4-20250514[1m]': {
|
||||
input: 0.000006, // $6/MTok
|
||||
output: 0.0000225 // $22.50/MTok
|
||||
}
|
||||
// 未来可以添加更多 1M 模型的价格
|
||||
}
|
||||
}
|
||||
|
||||
// 初始化价格服务
|
||||
@@ -329,9 +340,40 @@ class PricingService {
|
||||
|
||||
// 计算使用费用
|
||||
calculateCost(usage, modelName) {
|
||||
// 检查是否为 1M 上下文模型
|
||||
const isLongContextModel = modelName && modelName.includes('[1m]')
|
||||
let isLongContextRequest = false
|
||||
let useLongContextPricing = false
|
||||
|
||||
if (isLongContextModel) {
|
||||
// 计算总输入 tokens
|
||||
const inputTokens = usage.input_tokens || 0
|
||||
const cacheCreationTokens = usage.cache_creation_input_tokens || 0
|
||||
const cacheReadTokens = usage.cache_read_input_tokens || 0
|
||||
const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
|
||||
|
||||
// 如果总输入超过 200k,使用 1M 上下文价格
|
||||
if (totalInputTokens > 200000) {
|
||||
isLongContextRequest = true
|
||||
// 检查是否有硬编码的 1M 价格
|
||||
if (this.longContextPricing[modelName]) {
|
||||
useLongContextPricing = true
|
||||
} else {
|
||||
// 如果没有找到硬编码价格,使用第一个 1M 模型的价格作为默认
|
||||
const defaultLongContextModel = Object.keys(this.longContextPricing)[0]
|
||||
if (defaultLongContextModel) {
|
||||
useLongContextPricing = true
|
||||
logger.warn(
|
||||
`⚠️ No specific 1M pricing for ${modelName}, using default from ${defaultLongContextModel}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pricing = this.getModelPricing(modelName)
|
||||
|
||||
if (!pricing) {
|
||||
if (!pricing && !useLongContextPricing) {
|
||||
return {
|
||||
inputCost: 0,
|
||||
outputCost: 0,
|
||||
@@ -340,14 +382,35 @@ class PricingService {
|
||||
ephemeral5mCost: 0,
|
||||
ephemeral1hCost: 0,
|
||||
totalCost: 0,
|
||||
hasPricing: false
|
||||
hasPricing: false,
|
||||
isLongContextRequest: false
|
||||
}
|
||||
}
|
||||
|
||||
const inputCost = (usage.input_tokens || 0) * (pricing.input_cost_per_token || 0)
|
||||
const outputCost = (usage.output_tokens || 0) * (pricing.output_cost_per_token || 0)
|
||||
let inputCost = 0
|
||||
let outputCost = 0
|
||||
|
||||
if (useLongContextPricing) {
|
||||
// 使用 1M 上下文特殊价格(仅输入和输出价格改变)
|
||||
const longContextPrices =
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
|
||||
inputCost = (usage.input_tokens || 0) * longContextPrices.input
|
||||
outputCost = (usage.output_tokens || 0) * longContextPrices.output
|
||||
|
||||
logger.info(
|
||||
`💰 Using 1M context pricing for ${modelName}: input=$${longContextPrices.input}/token, output=$${longContextPrices.output}/token`
|
||||
)
|
||||
} else {
|
||||
// 使用正常价格
|
||||
inputCost = (usage.input_tokens || 0) * (pricing?.input_cost_per_token || 0)
|
||||
outputCost = (usage.output_tokens || 0) * (pricing?.output_cost_per_token || 0)
|
||||
}
|
||||
|
||||
// 缓存价格保持不变(即使对于 1M 模型)
|
||||
const cacheReadCost =
|
||||
(usage.cache_read_input_tokens || 0) * (pricing.cache_read_input_token_cost || 0)
|
||||
(usage.cache_read_input_tokens || 0) * (pricing?.cache_read_input_token_cost || 0)
|
||||
|
||||
// 处理缓存创建费用:
|
||||
// 1. 如果有详细的 cache_creation 对象,使用它
|
||||
@@ -362,7 +425,7 @@ class PricingService {
|
||||
const ephemeral1hTokens = usage.cache_creation.ephemeral_1h_input_tokens || 0
|
||||
|
||||
// 5分钟缓存使用标准的 cache_creation_input_token_cost
|
||||
ephemeral5mCost = ephemeral5mTokens * (pricing.cache_creation_input_token_cost || 0)
|
||||
ephemeral5mCost = ephemeral5mTokens * (pricing?.cache_creation_input_token_cost || 0)
|
||||
|
||||
// 1小时缓存使用硬编码的价格
|
||||
const ephemeral1hPrice = this.getEphemeral1hPricing(modelName)
|
||||
@@ -373,7 +436,7 @@ class PricingService {
|
||||
} else if (usage.cache_creation_input_tokens) {
|
||||
// 旧格式,所有缓存创建 tokens 都按 5 分钟价格计算(向后兼容)
|
||||
cacheCreateCost =
|
||||
(usage.cache_creation_input_tokens || 0) * (pricing.cache_creation_input_token_cost || 0)
|
||||
(usage.cache_creation_input_tokens || 0) * (pricing?.cache_creation_input_token_cost || 0)
|
||||
ephemeral5mCost = cacheCreateCost
|
||||
}
|
||||
|
||||
@@ -386,11 +449,22 @@ class PricingService {
|
||||
ephemeral1hCost,
|
||||
totalCost: inputCost + outputCost + cacheCreateCost + cacheReadCost,
|
||||
hasPricing: true,
|
||||
isLongContextRequest,
|
||||
pricing: {
|
||||
input: pricing.input_cost_per_token || 0,
|
||||
output: pricing.output_cost_per_token || 0,
|
||||
cacheCreate: pricing.cache_creation_input_token_cost || 0,
|
||||
cacheRead: pricing.cache_read_input_token_cost || 0,
|
||||
input: useLongContextPricing
|
||||
? (
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
)?.input || 0
|
||||
: pricing?.input_cost_per_token || 0,
|
||||
output: useLongContextPricing
|
||||
? (
|
||||
this.longContextPricing[modelName] ||
|
||||
this.longContextPricing[Object.keys(this.longContextPricing)[0]]
|
||||
)?.output || 0
|
||||
: pricing?.output_cost_per_token || 0,
|
||||
cacheCreate: pricing?.cache_creation_input_token_cost || 0,
|
||||
cacheRead: pricing?.cache_read_input_token_cost || 0,
|
||||
ephemeral1h: this.getEphemeral1hPricing(modelName)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user