From 1d90500de91610cf703c87dac43b11bd38110551 Mon Sep 17 00:00:00 2001 From: sczheng Date: Fri, 6 Feb 2026 11:58:54 +0800 Subject: [PATCH 1/6] =?UTF-8?q?mod:=20=E4=BF=AE=E6=94=B91m=E4=B8=8A?= =?UTF-8?q?=E4=B8=8B=E6=96=87=E7=9A=84=E7=A1=AC=E7=BC=96=E7=A0=81=E8=B4=B9?= =?UTF-8?q?=E7=94=A8=E8=AE=A1=E7=AE=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/services/pricingService.js | 152 +++++++--------- tests/pricingService.test.js | 322 +++++++++++++++++++++++++++++++++ 2 files changed, 391 insertions(+), 83 deletions(-) create mode 100644 tests/pricingService.test.js diff --git a/src/services/pricingService.js b/src/services/pricingService.js index a8f37667..6f513f13 100644 --- a/src/services/pricingService.js +++ b/src/services/pricingService.js @@ -63,17 +63,6 @@ class PricingService { 'claude-haiku-3': 0.0000016, 'claude-haiku-3-5': 0.0000016 } - - // 硬编码的 1M 上下文模型价格(美元/token) - // 当总输入 tokens 超过 200k 时使用这些价格 - this.longContextPricing = { - // claude-sonnet-4-20250514[1m] 模型的 1M 上下文价格 - 'claude-sonnet-4-20250514[1m]': { - input: 0.000006, // $6/MTok - output: 0.0000225 // $22.50/MTok - } - // 未来可以添加更多 1M 模型的价格 - } } // 初始化价格服务 @@ -498,40 +487,41 @@ class PricingService { // 计算使用费用 calculateCost(usage, modelName) { - // 检查是否为 1M 上下文模型 + // 检查是否为 1M 上下文模型(用户通过 [1m] 后缀主动选择长上下文模式) const isLongContextModel = modelName && modelName.includes('[1m]') let isLongContextRequest = false let useLongContextPricing = false - if (isLongContextModel) { - // 计算总输入 tokens - const inputTokens = usage.input_tokens || 0 - const cacheCreationTokens = usage.cache_creation_input_tokens || 0 - const cacheReadTokens = usage.cache_read_input_tokens || 0 - const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens + // 计算总输入 tokens(用于判断是否超过 200K 阈值) + const inputTokens = usage.input_tokens || 0 + const cacheCreationTokens = usage.cache_creation_input_tokens || 0 + const cacheReadTokens = usage.cache_read_input_tokens || 0 + const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens - // 如果总输入超过 200k,使用 1M 上下文价格 - if (totalInputTokens > 200000) { - isLongContextRequest = true - // 检查是否有硬编码的 1M 价格 - if (this.longContextPricing[modelName]) { - useLongContextPricing = true - } else { - // 如果没有找到硬编码价格,使用第一个 1M 模型的价格作为默认 - const defaultLongContextModel = Object.keys(this.longContextPricing)[0] - if (defaultLongContextModel) { - useLongContextPricing = true - logger.warn( - `⚠️ No specific 1M pricing for ${modelName}, using default from ${defaultLongContextModel}` - ) - } - } + // 获取模型定价信息 + const pricing = this.getModelPricing(modelName) + + // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时,使用高档价格 + // 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格 + if (isLongContextModel && totalInputTokens > 200000) { + isLongContextRequest = true + // 检查 model_pricing.json 是否有 above_200k 字段 + if ( + pricing?.input_cost_per_token_above_200k_tokens !== null && + pricing?.input_cost_per_token_above_200k_tokens !== undefined + ) { + useLongContextPricing = true + logger.info( + `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}` + ) + } else { + logger.warn( + `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json` + ) } } - const pricing = this.getModelPricing(modelName) - - if (!pricing && !useLongContextPricing) { + if (!pricing) { return { inputCost: 0, outputCost: 0, @@ -545,59 +535,65 @@ class PricingService { } } - let inputCost = 0 - let outputCost = 0 + // 确定实际使用的价格(普通或 200K+ 高档价格) + const actualInputPrice = useLongContextPricing + ? pricing.input_cost_per_token_above_200k_tokens + : pricing.input_cost_per_token || 0 - if (useLongContextPricing) { - // 使用 1M 上下文特殊价格(仅输入和输出价格改变) - const longContextPrices = - this.longContextPricing[modelName] || - this.longContextPricing[Object.keys(this.longContextPricing)[0]] + const actualOutputPrice = useLongContextPricing + ? pricing.output_cost_per_token_above_200k_tokens + : pricing.output_cost_per_token || 0 - inputCost = (usage.input_tokens || 0) * longContextPrices.input - outputCost = (usage.output_tokens || 0) * longContextPrices.output + const actualCacheCreatePrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_200k_tokens || + pricing.cache_creation_input_token_cost || + 0 + : pricing.cache_creation_input_token_cost || 0 - logger.info( - `💰 Using 1M context pricing for ${modelName}: input=$${longContextPrices.input}/token, output=$${longContextPrices.output}/token` - ) - } else { - // 使用正常价格 - inputCost = (usage.input_tokens || 0) * (pricing?.input_cost_per_token || 0) - outputCost = (usage.output_tokens || 0) * (pricing?.output_cost_per_token || 0) - } + const actualCacheReadPrice = useLongContextPricing + ? pricing.cache_read_input_token_cost_above_200k_tokens || + pricing.cache_read_input_token_cost || + 0 + : pricing.cache_read_input_token_cost || 0 - // 缓存价格保持不变(即使对于 1M 模型) - const cacheReadCost = - (usage.cache_read_input_tokens || 0) * (pricing?.cache_read_input_token_cost || 0) + // 1小时缓存的 200K+ 价格 + const actualEphemeral1hPrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens || + this.getEphemeral1hPricing(modelName) + : this.getEphemeral1hPricing(modelName) - // 处理缓存创建费用: - // 1. 如果有详细的 cache_creation 对象,使用它 - // 2. 否则使用总的 cache_creation_input_tokens(向后兼容) + // 计算各项费用 + const inputCost = inputTokens * actualInputPrice + const outputCost = (usage.output_tokens || 0) * actualOutputPrice + + // 处理缓存费用 let ephemeral5mCost = 0 let ephemeral1hCost = 0 let cacheCreateCost = 0 + let cacheReadCost = 0 if (usage.cache_creation && typeof usage.cache_creation === 'object') { // 有详细的缓存创建数据 const ephemeral5mTokens = usage.cache_creation.ephemeral_5m_input_tokens || 0 const ephemeral1hTokens = usage.cache_creation.ephemeral_1h_input_tokens || 0 - // 5分钟缓存使用标准的 cache_creation_input_token_cost - ephemeral5mCost = ephemeral5mTokens * (pricing?.cache_creation_input_token_cost || 0) + // 5分钟缓存使用 cache_creation 价格 + ephemeral5mCost = ephemeral5mTokens * actualCacheCreatePrice - // 1小时缓存使用硬编码的价格 - const ephemeral1hPrice = this.getEphemeral1hPricing(modelName) - ephemeral1hCost = ephemeral1hTokens * ephemeral1hPrice + // 1小时缓存使用 ephemeral_1h 价格 + ephemeral1hCost = ephemeral1hTokens * actualEphemeral1hPrice // 总的缓存创建费用 cacheCreateCost = ephemeral5mCost + ephemeral1hCost - } else if (usage.cache_creation_input_tokens) { + } else if (cacheCreationTokens) { // 旧格式,所有缓存创建 tokens 都按 5 分钟价格计算(向后兼容) - cacheCreateCost = - (usage.cache_creation_input_tokens || 0) * (pricing?.cache_creation_input_token_cost || 0) + cacheCreateCost = cacheCreationTokens * actualCacheCreatePrice ephemeral5mCost = cacheCreateCost } + // 缓存读取费用 + cacheReadCost = cacheReadTokens * actualCacheReadPrice + return { inputCost, outputCost, @@ -609,21 +605,11 @@ class PricingService { hasPricing: true, isLongContextRequest, pricing: { - input: useLongContextPricing - ? ( - this.longContextPricing[modelName] || - this.longContextPricing[Object.keys(this.longContextPricing)[0]] - )?.input || 0 - : pricing?.input_cost_per_token || 0, - output: useLongContextPricing - ? ( - this.longContextPricing[modelName] || - this.longContextPricing[Object.keys(this.longContextPricing)[0]] - )?.output || 0 - : pricing?.output_cost_per_token || 0, - cacheCreate: pricing?.cache_creation_input_token_cost || 0, - cacheRead: pricing?.cache_read_input_token_cost || 0, - ephemeral1h: this.getEphemeral1hPricing(modelName) + input: actualInputPrice, + output: actualOutputPrice, + cacheCreate: actualCacheCreatePrice, + cacheRead: actualCacheReadPrice, + ephemeral1h: actualEphemeral1hPrice } } } diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js new file mode 100644 index 00000000..60e71281 --- /dev/null +++ b/tests/pricingService.test.js @@ -0,0 +1,322 @@ +/** + * PricingService 长上下文(200K+)分层计费测试 + * + * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑: + * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段 + * - 所有 token 类型(input/output/cache_create/cache_read)都切换到高档价格 + */ + +// Mock logger to avoid console output during tests +jest.mock('../src/utils/logger', () => ({ + api: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + debug: jest.fn(), + success: jest.fn(), + database: jest.fn(), + security: jest.fn() +})) + +// Mock fs to control pricing data +jest.mock('fs', () => { + const actual = jest.requireActual('fs') + return { + ...actual, + existsSync: jest.fn(), + readFileSync: jest.fn(), + writeFileSync: jest.fn(), + mkdirSync: jest.fn(), + statSync: jest.fn(), + watchFile: jest.fn(), + unwatchFile: jest.fn() + } +}) + +describe('PricingService - 200K+ Long Context Pricing', () => { + let pricingService + const fs = require('fs') + + // 模拟 claude-sonnet-4-20250514 的完整价格数据(来自 model_pricing.json) + const mockPricingData = { + 'claude-sonnet-4-20250514': { + input_cost_per_token: 0.000003, // $3/MTok + output_cost_per_token: 0.000015, // $15/MTok + cache_creation_input_token_cost: 0.00000375, // $3.75/MTok + cache_read_input_token_cost: 0.0000003, // $0.30/MTok + // 200K+ 高档价格 + input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x) + output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x) + cache_creation_input_token_cost_above_200k_tokens: 0.0000075, // $7.50/MTok (2x) + cache_read_input_token_cost_above_200k_tokens: 0.0000006, // $0.60/MTok (2x) + // 1小时缓存价格 + cache_creation_input_token_cost_above_1hr: 0.0000075, + cache_creation_input_token_cost_above_1hr_above_200k_tokens: 0.000015 + }, + // 没有 above_200k 字段的模型 + 'claude-3-haiku-20240307': { + input_cost_per_token: 0.00000025, + output_cost_per_token: 0.00000125, + cache_creation_input_token_cost: 0.0000003, + cache_read_input_token_cost: 0.00000003 + } + } + + beforeEach(() => { + // 清除缓存的模块 + jest.resetModules() + + // 配置 fs mock + fs.existsSync.mockReturnValue(true) + fs.readFileSync.mockReturnValue(JSON.stringify(mockPricingData)) + fs.statSync.mockReturnValue({ mtime: new Date(), mtimeMs: Date.now() }) + fs.watchFile.mockImplementation(() => {}) + fs.unwatchFile.mockImplementation(() => {}) + + // 重新加载 pricingService + pricingService = require('../src/services/pricingService') + + // 直接设置价格数据(绕过初始化) + pricingService.pricingData = mockPricingData + pricingService.lastUpdated = new Date() + }) + + afterEach(() => { + // 清理定时器 + if (pricingService.cleanup) { + pricingService.cleanup() + } + jest.clearAllMocks() + }) + + describe('阈值边界测试', () => { + it('199999 tokens - 应使用基础价格', () => { + const usage = { + input_tokens: 199999, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0 + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(false) + expect(result.pricing.input).toBe(0.000003) // 基础价格 + expect(result.pricing.output).toBe(0.000015) // 基础价格 + }) + + it('200000 tokens - 应使用基础价格(边界不触发)', () => { + const usage = { + input_tokens: 200000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0 + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + // 200000 不大于 200000,所以不触发高档价格 + expect(result.isLongContextRequest).toBe(false) + expect(result.pricing.input).toBe(0.000003) // 基础价格 + }) + + it('200001 tokens - 应使用 200K+ 高档价格', () => { + const usage = { + input_tokens: 200001, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0 + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.000006) // 200K+ 高档价格 + expect(result.pricing.output).toBe(0.0000225) // 200K+ 高档价格 + }) + }) + + describe('总输入计算(input + cache_creation + cache_read)', () => { + it('分散在各类 token 中总计超过 200K 应触发高档价格', () => { + const usage = { + input_tokens: 150000, + output_tokens: 10000, + cache_creation_input_tokens: 40000, + cache_read_input_tokens: 20000 + } + // Total: 150000 + 40000 + 20000 = 210000 > 200000 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.000006) + expect(result.pricing.output).toBe(0.0000225) + expect(result.pricing.cacheCreate).toBe(0.0000075) + expect(result.pricing.cacheRead).toBe(0.0000006) + }) + + it('仅 cache_creation + cache_read 超过 200K 也应触发', () => { + const usage = { + input_tokens: 50000, + output_tokens: 5000, + cache_creation_input_tokens: 100000, + cache_read_input_tokens: 60000 + } + // Total: 50000 + 100000 + 60000 = 210000 > 200000 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(true) + }) + }) + + describe('Cache 高档价格测试', () => { + it('cache_creation 应使用 cache_creation_input_token_cost_above_200k_tokens', () => { + const usage = { + input_tokens: 150000, + output_tokens: 1000, + cache_creation_input_tokens: 60000, // 60K cache creation + cache_read_input_tokens: 0 + } + // Total: 210000 > 200000 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + // cache_creation_input_token_cost_above_200k_tokens = 0.0000075 + expect(result.pricing.cacheCreate).toBe(0.0000075) + expect(result.cacheCreateCost).toBeCloseTo(60000 * 0.0000075, 10) + }) + + it('cache_read 应使用 cache_read_input_token_cost_above_200k_tokens', () => { + const usage = { + input_tokens: 150000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 60000 // 60K cache read + } + // Total: 210000 > 200000 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + // cache_read_input_token_cost_above_200k_tokens = 0.0000006 + expect(result.pricing.cacheRead).toBe(0.0000006) + expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10) + }) + }) + + describe('详细缓存创建数据(ephemeral_5m / ephemeral_1h)', () => { + it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => { + const usage = { + input_tokens: 200001, + output_tokens: 1000, + cache_creation_input_tokens: 10000, // 向后兼容字段 + cache_read_input_tokens: 0, + cache_creation: { + ephemeral_5m_input_tokens: 5000, + ephemeral_1h_input_tokens: 5000 + } + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(true) + // ephemeral_5m: 5000 * 0.0000075 = 0.0000375 + expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10) + // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k) + expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10) + }) + }) + + describe('回退测试', () => { + it('模型无 above_200k 字段时回退到基础价格', () => { + const usage = { + input_tokens: 250000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0 + } + + const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]') + + // 模型没有 above_200k 字段,使用基础价格 + expect(result.isLongContextRequest).toBe(true) // 超过 200K + expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段) + expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格 + }) + }) + + describe('兼容性测试', () => { + it('非 [1m] 模型不受影响,始终使用基础价格', () => { + const usage = { + input_tokens: 250000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0 + } + + // 不带 [1m] 后缀 + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514') + + expect(result.isLongContextRequest).toBe(false) + expect(result.pricing.input).toBe(0.000003) // 基础价格 + expect(result.pricing.output).toBe(0.000015) // 基础价格 + expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格 + expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格 + }) + + it('[1m] 模型未超过 200K 时使用基础价格', () => { + const usage = { + input_tokens: 100000, + output_tokens: 1000, + cache_creation_input_tokens: 50000, + cache_read_input_tokens: 49000 + } + // Total: 199000 < 200000 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + expect(result.isLongContextRequest).toBe(false) + expect(result.pricing.input).toBe(0.000003) // 基础价格 + }) + + it('无定价数据时返回 hasPricing=false', () => { + const usage = { + input_tokens: 250000, + output_tokens: 1000 + } + + const result = pricingService.calculateCost(usage, 'unknown-model[1m]') + + expect(result.hasPricing).toBe(false) + expect(result.totalCost).toBe(0) + }) + }) + + describe('成本计算准确性', () => { + it('应正确计算 200K+ 场景下的总成本', () => { + const usage = { + input_tokens: 150000, + output_tokens: 10000, + cache_creation_input_tokens: 40000, + cache_read_input_tokens: 20000 + } + // Total input: 210000 > 200000 → 使用 200K+ 价格 + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') + + // 手动计算预期成本 + const expectedInputCost = 150000 * 0.000006 // $0.9 + const expectedOutputCost = 10000 * 0.0000225 // $0.225 + const expectedCacheCreateCost = 40000 * 0.0000075 // $0.3 + const expectedCacheReadCost = 20000 * 0.0000006 // $0.012 + const expectedTotal = + expectedInputCost + expectedOutputCost + expectedCacheCreateCost + expectedCacheReadCost + + expect(result.inputCost).toBeCloseTo(expectedInputCost, 10) + expect(result.outputCost).toBeCloseTo(expectedOutputCost, 10) + expect(result.cacheCreateCost).toBeCloseTo(expectedCacheCreateCost, 10) + expect(result.cacheReadCost).toBeCloseTo(expectedCacheReadCost, 10) + expect(result.totalCost).toBeCloseTo(expectedTotal, 10) + }) + }) +}) From 3b25cf01ade509e4d2e1d2a3d9713f894dd860ce Mon Sep 17 00:00:00 2001 From: sczheng189 <724100151@qq.com> Date: Sat, 14 Feb 2026 21:32:09 +0800 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Claude=20?= =?UTF-8?q?=E8=AE=A1=E8=B4=B9=E7=89=B9=E6=80=A7=E5=92=8C=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E5=85=83=E4=BF=A1=E6=81=AF=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E9=95=BF=E4=B8=8A=E4=B8=8B=E6=96=87=E8=AE=A1=E8=B4=B9?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/routes/api.js | 40 +++++ src/routes/openaiClaudeRoutes.js | 26 ++- src/services/pricingService.js | 277 ++++++++++++++++++++++++++----- tests/pricingService.test.js | 105 ++++++++++-- 4 files changed, 387 insertions(+), 61 deletions(-) diff --git a/src/routes/api.js b/src/routes/api.js index 4d5647e4..337cec5c 100644 --- a/src/routes/api.js +++ b/src/routes/api.js @@ -468,6 +468,17 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) { + usageObject.request_speed = _requestBody.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { @@ -562,6 +573,22 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headersConsole['anthropic-beta'] || + _headersConsole['Anthropic-Beta'] || + _headersConsole['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if ( + typeof _requestBodyConsole?.speed === 'string' && + _requestBodyConsole.speed.trim() + ) { + usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { @@ -728,6 +755,19 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headersCcr['anthropic-beta'] || + _headersCcr['Anthropic-Beta'] || + _headersCcr['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) { + usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { diff --git a/src/routes/openaiClaudeRoutes.js b/src/routes/openaiClaudeRoutes.js index ae791b56..fc910b6a 100644 --- a/src/routes/openaiClaudeRoutes.js +++ b/src/routes/openaiClaudeRoutes.js @@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) { (usage.cache_creation.ephemeral_1h_input_tokens || 0) : usage.cache_creation_input_tokens || 0) || 0 const cacheReadTokens = usage.cache_read_input_tokens || 0 + const usageWithRequestMeta = { ...usage } + const requestBetaHeader = + req.headers['anthropic-beta'] || + req.headers['Anthropic-Beta'] || + req.headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageWithRequestMeta.request_anthropic_beta = requestBetaHeader + } + if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) { + usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase() + } // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据 apiKeyService .recordUsageWithDetails( apiKeyData.id, - usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据 + usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed) model, accountId, accountType @@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) { (usage.cache_creation.ephemeral_1h_input_tokens || 0) : usage.cache_creation_input_tokens || 0) || 0 const cacheReadTokens = usage.cache_read_input_tokens || 0 + const usageWithRequestMeta = { ...usage } + const requestBetaHeader = + req.headers['anthropic-beta'] || + req.headers['Anthropic-Beta'] || + req.headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageWithRequestMeta.request_anthropic_beta = requestBetaHeader + } + if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) { + usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase() + } // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据 apiKeyService .recordUsageWithDetails( apiKeyData.id, - usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据 + usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed) claudeRequest.model, accountId, accountType diff --git a/src/services/pricingService.js b/src/services/pricingService.js index 6f513f13..55423361 100644 --- a/src/services/pricingService.js +++ b/src/services/pricingService.js @@ -63,6 +63,20 @@ class PricingService { 'claude-haiku-3': 0.0000016, 'claude-haiku-3-5': 0.0000016 } + + // Claude Prompt Caching 官方倍率(基于输入价格) + this.claudeCacheMultipliers = { + write5m: 1.25, + write1h: 2, + read: 0.1 + } + + // Claude 扩展计费特性 + this.claudeFeatureFlags = { + context1mBeta: 'context-1m-2025-08-07', + fastModeBeta: 'fast-mode-2026-02-01', + fastModeSpeed: 'fast' + } } // 初始化价格服务 @@ -451,14 +465,139 @@ class PricingService { return pricing } - // 获取 1 小时缓存价格 - getEphemeral1hPricing(modelName) { + // 从 usage 对象中提取 beta 特性列表(小写) + extractBetaFeatures(usage) { + const features = new Set() + if (!usage || typeof usage !== 'object') { + return features + } + + const requestHeaders = usage.request_headers || usage.requestHeaders || null + const headerBeta = + requestHeaders && typeof requestHeaders === 'object' + ? requestHeaders['anthropic-beta'] || + requestHeaders['Anthropic-Beta'] || + requestHeaders['ANTHROPIC-BETA'] + : null + + const candidates = [ + usage.anthropic_beta, + usage.anthropicBeta, + usage.request_anthropic_beta, + usage.requestAnthropicBeta, + usage.beta_header, + usage.betaHeader, + usage.beta_features, + headerBeta + ] + + const addFeature = (value) => { + if (!value || typeof value !== 'string') { + return + } + value + .split(',') + .map((item) => item.trim().toLowerCase()) + .filter(Boolean) + .forEach((item) => features.add(item)) + } + + for (const candidate of candidates) { + if (Array.isArray(candidate)) { + candidate.forEach(addFeature) + } else { + addFeature(candidate) + } + } + + return features + } + + // 提取请求/响应中的 speed 字段(小写) + extractSpeedSignal(usage) { + if (!usage || typeof usage !== 'object') { + return { responseSpeed: '', requestSpeed: '' } + } + + const normalize = (value) => + typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : '' + + return { + responseSpeed: normalize(usage.speed), + requestSpeed: normalize(usage.request_speed || usage.requestSpeed) + } + } + + // Claude Fast Mode 目前仅适用于 Opus 4.6 系列 + isFastModeEligibleClaudeModel(modelName) { + return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6') + } + + // 去掉模型名中的 [1m] 后缀,便于价格查找 + stripLongContextSuffix(modelName) { + if (typeof modelName !== 'string') { + return modelName + } + return modelName.replace(/\[1m\]/gi, '').trim() + } + + // 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀) + getFastModePricing(modelName) { + if (!this.pricingData || !modelName) { + return null + } + + const cleanedModelName = this.stripLongContextSuffix(modelName) + const exactCandidates = new Set([`fast/${cleanedModelName}`]) + + if (cleanedModelName.startsWith('fast/')) { + exactCandidates.add(cleanedModelName) + } + + for (const candidate of exactCandidates) { + if (this.pricingData[candidate]) { + logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`) + return this.pricingData[candidate] + } + } + + const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '') + for (const [key, value] of Object.entries(this.pricingData)) { + if (!key.startsWith('fast/')) { + continue + } + const normalizedFastKey = key + .slice('fast/'.length) + .toLowerCase() + .replace(/[_-]/g, '') + if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) { + logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`) + return value + } + } + + logger.debug(`💰 No fast pricing found for model: ${modelName}`) + return null + } + + // 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段) + getEphemeral1hPricing(modelName, pricing = null) { + if ( + pricing?.cache_creation_input_token_cost_above_1hr !== null && + pricing?.cache_creation_input_token_cost_above_1hr !== undefined + ) { + return pricing.cache_creation_input_token_cost_above_1hr + } + if (!modelName) { return 0 } // 尝试直接匹配 - if (this.ephemeral1hPricing[modelName]) { + if ( + this.ephemeral1hPricing[modelName] !== null && + this.ephemeral1hPricing[modelName] !== undefined + ) { return this.ephemeral1hPricing[modelName] } @@ -487,8 +626,10 @@ class PricingService { // 计算使用费用 calculateCost(usage, modelName) { + const normalizedModelName = this.stripLongContextSuffix(modelName) + // 检查是否为 1M 上下文模型(用户通过 [1m] 后缀主动选择长上下文模式) - const isLongContextModel = modelName && modelName.includes('[1m]') + const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]') let isLongContextRequest = false let useLongContextPricing = false @@ -498,27 +639,31 @@ class PricingService { const cacheReadTokens = usage.cache_read_input_tokens || 0 const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens - // 获取模型定价信息 - const pricing = this.getModelPricing(modelName) + // 识别 Claude 特性标识 + const betaFeatures = this.extractBetaFeatures(usage) + const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta) + const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta) + const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage) + const hasFastSpeedSignal = + responseSpeed === this.claudeFeatureFlags.fastModeSpeed || + requestSpeed === this.claudeFeatureFlags.fastModeSpeed + const isFastModeRequest = + hasFastModeBeta && + hasFastSpeedSignal && + this.isFastModeEligibleClaudeModel(normalizedModelName) + const standardPricing = this.getModelPricing(modelName) + const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null + const pricing = fastPricing || standardPricing + const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta - // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时,使用高档价格 + // 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑 // 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格 - if (isLongContextModel && totalInputTokens > 200000) { + if (isLongContextModeEnabled && totalInputTokens > 200000) { isLongContextRequest = true - // 检查 model_pricing.json 是否有 above_200k 字段 - if ( - pricing?.input_cost_per_token_above_200k_tokens !== null && - pricing?.input_cost_per_token_above_200k_tokens !== undefined - ) { - useLongContextPricing = true - logger.info( - `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}` - ) - } else { - logger.warn( - `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json` - ) - } + useLongContextPricing = true + logger.info( + `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}` + ) } if (!pricing) { @@ -535,32 +680,76 @@ class PricingService { } } - // 确定实际使用的价格(普通或 200K+ 高档价格) - const actualInputPrice = useLongContextPricing - ? pricing.input_cost_per_token_above_200k_tokens - : pricing.input_cost_per_token || 0 + const isClaudeModel = + (modelName && modelName.toLowerCase().includes('claude')) || + (typeof pricing?.litellm_provider === 'string' && + pricing.litellm_provider.toLowerCase().includes('anthropic')) - const actualOutputPrice = useLongContextPricing - ? pricing.output_cost_per_token_above_200k_tokens - : pricing.output_cost_per_token || 0 + if (isFastModeRequest && fastPricing) { + logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`) + } else if (isFastModeRequest && !fastPricing) { + logger.warn( + `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile` + ) + } - const actualCacheCreatePrice = useLongContextPricing - ? pricing.cache_creation_input_token_cost_above_200k_tokens || - pricing.cache_creation_input_token_cost || - 0 - : pricing.cache_creation_input_token_cost || 0 + const baseInputPrice = pricing.input_cost_per_token || 0 + const hasInput200kPrice = + pricing.input_cost_per_token_above_200k_tokens !== null && + pricing.input_cost_per_token_above_200k_tokens !== undefined - const actualCacheReadPrice = useLongContextPricing - ? pricing.cache_read_input_token_cost_above_200k_tokens || - pricing.cache_read_input_token_cost || - 0 - : pricing.cache_read_input_token_cost || 0 + // 确定实际使用的输入价格(普通或 200K+ 高档价格) + // Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底 + let actualInputPrice = useLongContextPricing + ? hasInput200kPrice + ? pricing.input_cost_per_token_above_200k_tokens + : isClaudeModel + ? baseInputPrice * 2 + : baseInputPrice + : baseInputPrice - // 1小时缓存的 200K+ 价格 - const actualEphemeral1hPrice = useLongContextPricing - ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens || - this.getEphemeral1hPricing(modelName) - : this.getEphemeral1hPricing(modelName) + const baseOutputPrice = pricing.output_cost_per_token || 0 + const hasOutput200kPrice = + pricing.output_cost_per_token_above_200k_tokens !== null && + pricing.output_cost_per_token_above_200k_tokens !== undefined + let actualOutputPrice = useLongContextPricing + ? hasOutput200kPrice + ? pricing.output_cost_per_token_above_200k_tokens + : baseOutputPrice + : baseOutputPrice + + let actualCacheCreatePrice = 0 + let actualCacheReadPrice = 0 + let actualEphemeral1hPrice = 0 + + if (isClaudeModel) { + // Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差 + actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m + actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read + actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h + } else { + actualCacheCreatePrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_200k_tokens || + pricing.cache_creation_input_token_cost || + 0 + : pricing.cache_creation_input_token_cost || 0 + + actualCacheReadPrice = useLongContextPricing + ? pricing.cache_read_input_token_cost_above_200k_tokens || + pricing.cache_read_input_token_cost || + 0 + : pricing.cache_read_input_token_cost || 0 + + const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing) + + // 非 Claude 模型维持原有字段优先级 + actualEphemeral1hPrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null && + pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined + ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens + : defaultEphemeral1hPrice + : defaultEphemeral1hPrice + } // 计算各项费用 const inputCost = inputTokens * actualInputPrice diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js index 60e71281..40dc0dd7 100644 --- a/tests/pricingService.test.js +++ b/tests/pricingService.test.js @@ -2,8 +2,11 @@ * PricingService 长上下文(200K+)分层计费测试 * * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑: - * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段 - * - 所有 token 类型(input/output/cache_create/cache_read)都切换到高档价格 + * - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段 + * - Claude 缓存价格按输入价格倍率推导: + * - 5m cache write = input * 1.25 + * - 1h cache write = input * 2 + * - cache read = input * 0.1 */ // Mock logger to avoid console output during tests @@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { output_cost_per_token: 0.000015, // $15/MTok cache_creation_input_token_cost: 0.00000375, // $3.75/MTok cache_read_input_token_cost: 0.0000003, // $0.30/MTok + max_input_tokens: 1000000, // 200K+ 高档价格 input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x) output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x) @@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => { output_cost_per_token: 0.00000125, cache_creation_input_token_cost: 0.0000003, cache_read_input_token_cost: 0.00000003 + }, + // Fast Mode 适配测试模型(Opus 4.6) + 'claude-opus-4-6': { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.000025, + cache_creation_input_token_cost: 0.00000625, + cache_read_input_token_cost: 0.0000005, + input_cost_per_token_above_200k_tokens: 0.00001, + output_cost_per_token_above_200k_tokens: 0.0000375 } } @@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.pricing.input).toBe(0.000006) expect(result.pricing.output).toBe(0.0000225) expect(result.pricing.cacheCreate).toBe(0.0000075) - expect(result.pricing.cacheRead).toBe(0.0000006) + expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12) }) it('仅 cache_creation + cache_read 超过 200K 也应触发', () => { @@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => { const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') // cache_read_input_token_cost_above_200k_tokens = 0.0000006 - expect(result.pricing.cacheRead).toBe(0.0000006) + expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12) expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10) }) }) describe('详细缓存创建数据(ephemeral_5m / ephemeral_1h)', () => { - it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => { + it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => { const usage = { input_tokens: 200001, output_tokens: 1000, @@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.isLongContextRequest).toBe(true) // ephemeral_5m: 5000 * 0.0000075 = 0.0000375 expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10) - // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k) - expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10) + // 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012 + expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10) + expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10) }) }) describe('回退测试', () => { - it('模型无 above_200k 字段时回退到基础价格', () => { + it('Claude 模型无 above_200k 字段时,200K+ 输入价格按 2 倍并推导缓存价格', () => { const usage = { input_tokens: 250000, output_tokens: 1000, - cache_creation_input_tokens: 0, - cache_read_input_tokens: 0 + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 10000 } const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]') - // 模型没有 above_200k 字段,使用基础价格 - expect(result.isLongContextRequest).toBe(true) // 超过 200K - expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段) - expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格 + // 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底 + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2 + // 缓存价格由输入价格推导 + expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25 + expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1 + }) + }) + + describe('Header 与 Fast Mode 适配', () => { + it('无 [1m] 后缀但带 context-1m beta,超过 200K 时应触发长上下文计费', () => { + const usage = { + input_tokens: 210000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + request_anthropic_beta: 'context-1m-2025-08-07' + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514') + + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.000006) + expect(result.pricing.output).toBe(0.0000225) + }) + + it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => { + const usage = { + input_tokens: 100000, + output_tokens: 20000, + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 5000, + request_anthropic_beta: 'fast-mode-2026-02-01', + speed: 'fast' + } + + const result = pricingService.calculateCost(usage, 'claude-opus-4-6') + + // input: 0.000005 * 6 = 0.00003 + expect(result.pricing.input).toBeCloseTo(0.00003, 12) + // output: 0.000025 * 6 = 0.00015 + expect(result.pricing.output).toBeCloseTo(0.00015, 12) + // cache create/read 由 fast 后 input 推导 + expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25 + expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1 + expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2 + }) + + it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费(12x input)', () => { + const usage = { + input_tokens: 210000, + output_tokens: 1000, + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 10000, + request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07', + speed: 'fast' + } + + const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]') + + expect(result.isLongContextRequest).toBe(true) + // input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x) + expect(result.pricing.input).toBeCloseTo(0.00006, 12) + // output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x) + expect(result.pricing.output).toBeCloseTo(0.000225, 12) }) }) @@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.pricing.input).toBe(0.000003) // 基础价格 expect(result.pricing.output).toBe(0.000015) // 基础价格 expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格 - expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格 + expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格 }) it('[1m] 模型未超过 200K 时使用基础价格', () => { From 5376428dd98e02e77c1625def690416bbabdb31f Mon Sep 17 00:00:00 2001 From: sczheng189 <724100151@qq.com> Date: Mon, 23 Feb 2026 20:12:42 +0800 Subject: [PATCH 3/6] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9=20epheme?= =?UTF-8?q?ral=205m=20=E5=92=8C=201h=20=E4=BB=A4=E7=89=8C=E7=9A=84?= =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98=E5=8C=96=E8=B4=B9=E7=94=A8?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 ++ src/models/redis.js | 24 ++++++++++++++ src/routes/admin/apiKeys.js | 31 +++++++++++++----- src/routes/admin/dashboard.js | 14 +++++++- src/routes/admin/usageStats.js | 47 +++++++++++++++++++++++++++ src/routes/apiStats.js | 14 +++++++- src/services/accountBalanceService.js | 10 ++++++ 7 files changed, 131 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index e4c9e9c1..71121e38 100644 --- a/.gitignore +++ b/.gitignore @@ -247,3 +247,5 @@ web/apiStats/ # Admin SPA build files web/admin-spa/dist/ + +.serena/ diff --git a/src/models/redis.js b/src/models/redis.js index 2f139d6a..dbac2a7d 100644 --- a/src/models/redis.js +++ b/src/models/redis.js @@ -1084,6 +1084,9 @@ class RedisClient { pipeline.hincrby(modelDaily, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(modelDaily, 'allTokens', totalTokens) pipeline.hincrby(modelDaily, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(modelDaily, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(modelDaily, 'ephemeral1hTokens', ephemeral1hTokens) // 按模型统计 - 每月 pipeline.hincrby(modelMonthly, 'inputTokens', finalInputTokens) @@ -1092,6 +1095,9 @@ class RedisClient { pipeline.hincrby(modelMonthly, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(modelMonthly, 'allTokens', totalTokens) pipeline.hincrby(modelMonthly, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(modelMonthly, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(modelMonthly, 'ephemeral1hTokens', ephemeral1hTokens) // API Key级别的模型统计 - 每日 pipeline.hincrby(keyModelDaily, 'inputTokens', finalInputTokens) @@ -1136,6 +1142,9 @@ class RedisClient { pipeline.hincrby(keyModelAlltime, 'cacheCreateTokens', finalCacheCreateTokens) pipeline.hincrby(keyModelAlltime, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(keyModelAlltime, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(keyModelAlltime, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(keyModelAlltime, 'ephemeral1hTokens', ephemeral1hTokens) // 费用统计 if (realCost > 0) { pipeline.hincrby(keyModelAlltime, 'realCostMicro', Math.round(realCost * 1000000)) @@ -1152,6 +1161,9 @@ class RedisClient { pipeline.hincrby(hourly, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(hourly, 'allTokens', totalTokens) pipeline.hincrby(hourly, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(hourly, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(hourly, 'ephemeral1hTokens', ephemeral1hTokens) // 按模型统计 - 每小时 pipeline.hincrby(modelHourly, 'inputTokens', finalInputTokens) @@ -1160,6 +1172,9 @@ class RedisClient { pipeline.hincrby(modelHourly, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(modelHourly, 'allTokens', totalTokens) pipeline.hincrby(modelHourly, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(modelHourly, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(modelHourly, 'ephemeral1hTokens', ephemeral1hTokens) // API Key级别的模型统计 - 每小时 pipeline.hincrby(keyModelHourly, 'inputTokens', finalInputTokens) @@ -1168,6 +1183,9 @@ class RedisClient { pipeline.hincrby(keyModelHourly, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(keyModelHourly, 'allTokens', totalTokens) pipeline.hincrby(keyModelHourly, 'requests', 1) + // 详细缓存类型统计 + pipeline.hincrby(keyModelHourly, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(keyModelHourly, 'ephemeral1hTokens', ephemeral1hTokens) // 费用统计 if (realCost > 0) { pipeline.hincrby(keyModelHourly, 'realCostMicro', Math.round(realCost * 1000000)) @@ -1235,18 +1253,24 @@ class RedisClient { pipeline.hincrby('usage:global:total', 'cacheCreateTokens', finalCacheCreateTokens) pipeline.hincrby('usage:global:total', 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby('usage:global:total', 'allTokens', totalTokens) + pipeline.hincrby('usage:global:total', 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby('usage:global:total', 'ephemeral1hTokens', ephemeral1hTokens) pipeline.hincrby(globalDaily, 'requests', 1) pipeline.hincrby(globalDaily, 'inputTokens', finalInputTokens) pipeline.hincrby(globalDaily, 'outputTokens', finalOutputTokens) pipeline.hincrby(globalDaily, 'cacheCreateTokens', finalCacheCreateTokens) pipeline.hincrby(globalDaily, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(globalDaily, 'allTokens', totalTokens) + pipeline.hincrby(globalDaily, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(globalDaily, 'ephemeral1hTokens', ephemeral1hTokens) pipeline.hincrby(globalMonthly, 'requests', 1) pipeline.hincrby(globalMonthly, 'inputTokens', finalInputTokens) pipeline.hincrby(globalMonthly, 'outputTokens', finalOutputTokens) pipeline.hincrby(globalMonthly, 'cacheCreateTokens', finalCacheCreateTokens) pipeline.hincrby(globalMonthly, 'cacheReadTokens', finalCacheReadTokens) pipeline.hincrby(globalMonthly, 'allTokens', totalTokens) + pipeline.hincrby(globalMonthly, 'ephemeral5mTokens', ephemeral5mTokens) + pipeline.hincrby(globalMonthly, 'ephemeral1hTokens', ephemeral1hTokens) pipeline.expire(globalDaily, 86400 * 32) pipeline.expire(globalMonthly, 86400 * 365) diff --git a/src/routes/admin/apiKeys.js b/src/routes/admin/apiKeys.js index 7bbc4f8a..d348493c 100644 --- a/src/routes/admin/apiKeys.js +++ b/src/routes/admin/apiKeys.js @@ -1289,6 +1289,8 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0, requests: 0 }) } @@ -1300,6 +1302,10 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { parseInt(data.totalCacheCreateTokens) || parseInt(data.cacheCreateTokens) || 0 stats.cacheReadTokens += parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0 + stats.ephemeral5mTokens += + parseInt(data.totalEphemeral5mTokens) || parseInt(data.ephemeral5mTokens) || 0 + stats.ephemeral1hTokens += + parseInt(data.totalEphemeral1hTokens) || parseInt(data.ephemeral1hTokens) || 0 stats.requests += parseInt(data.totalRequests) || parseInt(data.requests) || 0 totalRequests += parseInt(data.totalRequests) || parseInt(data.requests) || 0 @@ -1318,15 +1324,22 @@ async function calculateKeyStats(keyId, timeRange, startDate, endDate) { cacheCreateTokens += stats.cacheCreateTokens cacheReadTokens += stats.cacheReadTokens - const costResult = CostCalculator.calculateCost( - { - input_tokens: stats.inputTokens, - output_tokens: stats.outputTokens, - cache_creation_input_tokens: stats.cacheCreateTokens, - cache_read_input_tokens: stats.cacheReadTokens - }, - model - ) + const costUsage = { + input_tokens: stats.inputTokens, + output_tokens: stats.outputTokens, + cache_creation_input_tokens: stats.cacheCreateTokens, + cache_read_input_tokens: stats.cacheReadTokens + } + + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { + costUsage.cache_creation = { + ephemeral_5m_input_tokens: stats.ephemeral5mTokens, + ephemeral_1h_input_tokens: stats.ephemeral1hTokens + } + } + + const costResult = CostCalculator.calculateCost(costUsage, model) totalCost += costResult.costs.total } diff --git a/src/routes/admin/dashboard.js b/src/routes/admin/dashboard.js index fb47f98e..52f7f09e 100644 --- a/src/routes/admin/dashboard.js +++ b/src/routes/admin/dashboard.js @@ -472,7 +472,9 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => { outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, - allTokens: 0 + allTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0 } stats.requests += parseInt(data.requests) || 0 @@ -481,6 +483,8 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => { stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 stats.allTokens += parseInt(data.allTokens) || 0 + stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 modelStatsMap.set(normalizedModel, stats) } @@ -497,6 +501,14 @@ router.get('/model-stats', authenticateAdmin, async (req, res) => { cache_read_input_tokens: stats.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: stats.ephemeral5mTokens, + ephemeral_1h_input_tokens: stats.ephemeral1hTokens + } + } + // 计算费用 const costData = CostCalculator.calculateCost(usage, model) diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js index cfa61bd4..5bd9ea79 100644 --- a/src/routes/admin/usageStats.js +++ b/src/routes/admin/usageStats.js @@ -786,6 +786,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { const modelOutputTokens = parseInt(data.outputTokens) || 0 const modelCacheCreateTokens = parseInt(data.cacheCreateTokens) || 0 const modelCacheReadTokens = parseInt(data.cacheReadTokens) || 0 + const modelEphemeral5mTokens = parseInt(data.ephemeral5mTokens) || 0 + const modelEphemeral1hTokens = parseInt(data.ephemeral1hTokens) || 0 const modelRequests = parseInt(data.requests) || 0 dayInputTokens += modelInputTokens @@ -800,6 +802,15 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: modelCacheCreateTokens, cache_read_input_tokens: modelCacheReadTokens } + + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (modelEphemeral5mTokens > 0 || modelEphemeral1hTokens > 0) { + modelUsage.cache_creation = { + ephemeral_5m_input_tokens: modelEphemeral5mTokens, + ephemeral_1h_input_tokens: modelEphemeral1hTokens + } + } + const modelCostResult = CostCalculator.calculateCost(modelUsage, model) dayCost += modelCostResult.costs.total } @@ -948,6 +959,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0, allTokens: 0 }) } @@ -957,6 +970,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = stats.outputTokens += parseInt(data.outputTokens) || 0 stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 stats.allTokens += parseInt(data.allTokens) || 0 } } @@ -992,6 +1007,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0, allTokens: 0 }) } @@ -1001,6 +1018,8 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = stats.outputTokens += parseInt(data.outputTokens) || 0 stats.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 stats.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + stats.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + stats.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 stats.allTokens += parseInt(data.allTokens) || 0 } } @@ -1016,6 +1035,14 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = cache_read_input_tokens: stats.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (stats.ephemeral5mTokens > 0 || stats.ephemeral1hTokens > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: stats.ephemeral5mTokens, + ephemeral_1h_input_tokens: stats.ephemeral1hTokens + } + } + // 使用CostCalculator计算费用 const costData = CostCalculator.calculateCost(usage, model) @@ -1424,6 +1451,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => { cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, modelName) cost += costResult.costs.total } @@ -1582,6 +1619,16 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => { cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, modelName) cost += costResult.costs.total } diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js index 9d7b2ca6..a156df65 100644 --- a/src/routes/apiStats.js +++ b/src/routes/apiStats.js @@ -270,7 +270,9 @@ router.post('/api/user-stats', async (req, res) => { inputTokens: 0, outputTokens: 0, cacheCreateTokens: 0, - cacheReadTokens: 0 + cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0 }) } @@ -279,6 +281,8 @@ router.post('/api/user-stats', async (req, res) => { modelUsage.outputTokens += parseInt(data.outputTokens) || 0 modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 } } @@ -291,6 +295,14 @@ router.post('/api/user-stats', async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + const costResult = CostCalculator.calculateCost(usageData, model) totalCost += costResult.costs.total } diff --git a/src/services/accountBalanceService.js b/src/services/accountBalanceService.js index ec25f171..81fd0501 100644 --- a/src/services/accountBalanceService.js +++ b/src/services/accountBalanceService.js @@ -607,6 +607,16 @@ class AccountBalanceService { cache_read_input_tokens: parseInt(data.cacheReadTokens || 0) } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(data.ephemeral5mTokens || 0) + const eph1h = parseInt(data.ephemeral1hTokens || 0) + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) totalCost += costResult.costs.total || 0 } From bfae62bfeb294a519d4d2ebe78cd1ee145899cb9 Mon Sep 17 00:00:00 2001 From: sczheng189 <724100151@qq.com> Date: Mon, 23 Feb 2026 21:20:18 +0800 Subject: [PATCH 4/6] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9=20ephem?= =?UTF-8?q?eral=205m=20=E5=92=8C=201h=20=E4=BB=A4=E7=89=8C=E7=9A=84?= =?UTF-8?q?=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98=E5=8C=96=E8=B4=B9=E7=94=A8?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/redis.js | 65 +++++++++ src/routes/admin/claudeAccounts.js | 8 ++ src/routes/admin/usageStats.js | 222 ++++++++++++++++++++++++++--- src/routes/apiStats.js | 38 +++++ src/services/apiKeyService.js | 4 + src/services/costInitService.js | 10 ++ src/services/droidRelayService.js | 2 + 7 files changed, 331 insertions(+), 18 deletions(-) diff --git a/src/models/redis.js b/src/models/redis.js index dbac2a7d..bdffc9c6 100644 --- a/src/models/redis.js +++ b/src/models/redis.js @@ -1286,6 +1286,8 @@ class RedisClient { outputTokens = 0, cacheCreateTokens = 0, cacheReadTokens = 0, + ephemeral5mTokens = 0, + ephemeral1hTokens = 0, model = 'unknown', isLongContextRequest = false ) { @@ -1317,6 +1319,8 @@ class RedisClient { const finalOutputTokens = outputTokens || 0 const finalCacheCreateTokens = cacheCreateTokens || 0 const finalCacheReadTokens = cacheReadTokens || 0 + const finalEphemeral5mTokens = ephemeral5mTokens || 0 + const finalEphemeral1hTokens = ephemeral1hTokens || 0 const actualTotalTokens = finalInputTokens + finalOutputTokens + finalCacheCreateTokens + finalCacheReadTokens const coreTokens = finalInputTokens + finalOutputTokens @@ -1329,6 +1333,8 @@ class RedisClient { this.client.hincrby(accountKey, 'totalOutputTokens', finalOutputTokens), this.client.hincrby(accountKey, 'totalCacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountKey, 'totalCacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountKey, 'totalEphemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountKey, 'totalEphemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountKey, 'totalAllTokens', actualTotalTokens), this.client.hincrby(accountKey, 'totalRequests', 1), @@ -1338,6 +1344,8 @@ class RedisClient { this.client.hincrby(accountDaily, 'outputTokens', finalOutputTokens), this.client.hincrby(accountDaily, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountDaily, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountDaily, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountDaily, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountDaily, 'allTokens', actualTotalTokens), this.client.hincrby(accountDaily, 'requests', 1), @@ -1347,6 +1355,8 @@ class RedisClient { this.client.hincrby(accountMonthly, 'outputTokens', finalOutputTokens), this.client.hincrby(accountMonthly, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountMonthly, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountMonthly, 'allTokens', actualTotalTokens), this.client.hincrby(accountMonthly, 'requests', 1), @@ -1356,6 +1366,8 @@ class RedisClient { this.client.hincrby(accountHourly, 'outputTokens', finalOutputTokens), this.client.hincrby(accountHourly, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountHourly, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountHourly, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountHourly, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountHourly, 'allTokens', actualTotalTokens), this.client.hincrby(accountHourly, 'requests', 1), @@ -1376,6 +1388,16 @@ class RedisClient { `model:${normalizedModel}:cacheReadTokens`, finalCacheReadTokens ), + this.client.hincrby( + accountHourly, + `model:${normalizedModel}:ephemeral5mTokens`, + finalEphemeral5mTokens + ), + this.client.hincrby( + accountHourly, + `model:${normalizedModel}:ephemeral1hTokens`, + finalEphemeral1hTokens + ), this.client.hincrby(accountHourly, `model:${normalizedModel}:allTokens`, actualTotalTokens), this.client.hincrby(accountHourly, `model:${normalizedModel}:requests`, 1), @@ -1384,6 +1406,8 @@ class RedisClient { this.client.hincrby(accountModelDaily, 'outputTokens', finalOutputTokens), this.client.hincrby(accountModelDaily, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountModelDaily, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountModelDaily, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountModelDaily, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountModelDaily, 'allTokens', actualTotalTokens), this.client.hincrby(accountModelDaily, 'requests', 1), @@ -1392,6 +1416,8 @@ class RedisClient { this.client.hincrby(accountModelMonthly, 'outputTokens', finalOutputTokens), this.client.hincrby(accountModelMonthly, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountModelMonthly, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountModelMonthly, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountModelMonthly, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountModelMonthly, 'allTokens', actualTotalTokens), this.client.hincrby(accountModelMonthly, 'requests', 1), @@ -1400,6 +1426,8 @@ class RedisClient { this.client.hincrby(accountModelHourly, 'outputTokens', finalOutputTokens), this.client.hincrby(accountModelHourly, 'cacheCreateTokens', finalCacheCreateTokens), this.client.hincrby(accountModelHourly, 'cacheReadTokens', finalCacheReadTokens), + this.client.hincrby(accountModelHourly, 'ephemeral5mTokens', finalEphemeral5mTokens), + this.client.hincrby(accountModelHourly, 'ephemeral1hTokens', finalEphemeral1hTokens), this.client.hincrby(accountModelHourly, 'allTokens', actualTotalTokens), this.client.hincrby(accountModelHourly, 'requests', 1), @@ -1867,6 +1895,16 @@ class RedisClient { cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0) } + // 添加 cache_creation 子对象以支持精确 ephemeral 定价 + const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) totalCost += costResult.costs.total @@ -1955,6 +1993,16 @@ class RedisClient { cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0) } + // 添加 cache_creation 子对象以支持精确 ephemeral 定价 + const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) costMap.set(accountId, costMap.get(accountId) + costResult.costs.total) } @@ -1996,6 +2044,17 @@ class RedisClient { cache_creation_input_tokens: parseInt(modelUsage.cacheCreateTokens || 0), cache_read_input_tokens: parseInt(modelUsage.cacheReadTokens || 0) } + + // 添加 cache_creation 子对象以支持精确 ephemeral 定价 + const eph5m = parseInt(modelUsage.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelUsage.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) totalCost += costResult.costs.total } @@ -3646,6 +3705,8 @@ class RedisClient { outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0, allTokens: 0, requests: 0 } @@ -3659,6 +3720,10 @@ class RedisClient { modelUsage[modelName].cacheCreateTokens += parseInt(value || 0) } else if (metric === 'cacheReadTokens') { modelUsage[modelName].cacheReadTokens += parseInt(value || 0) + } else if (metric === 'ephemeral5mTokens') { + modelUsage[modelName].ephemeral5mTokens += parseInt(value || 0) + } else if (metric === 'ephemeral1hTokens') { + modelUsage[modelName].ephemeral1hTokens += parseInt(value || 0) } else if (metric === 'allTokens') { modelUsage[modelName].allTokens += parseInt(value || 0) } else if (metric === 'requests') { diff --git a/src/routes/admin/claudeAccounts.js b/src/routes/admin/claudeAccounts.js index 590e919d..699aa3ac 100644 --- a/src/routes/admin/claudeAccounts.js +++ b/src/routes/admin/claudeAccounts.js @@ -417,6 +417,14 @@ router.get('/claude-accounts', authenticateAdmin, async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens } + // 添加 cache_creation 子对象以支持精确 ephemeral 定价 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + logger.debug(`💰 Calculating cost for model ${modelName}:`, JSON.stringify(usageData)) const costResult = CostCalculator.calculateCost(usageData, modelName) logger.debug(`💰 Cost result for ${modelName}: total=${costResult.costs.total}`) diff --git a/src/routes/admin/usageStats.js b/src/routes/admin/usageStats.js index 5bd9ea79..849ad54b 100644 --- a/src/routes/admin/usageStats.js +++ b/src/routes/admin/usageStats.js @@ -362,6 +362,16 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req, cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, modelName) summedCost += costResult.costs.total } @@ -403,6 +413,15 @@ router.get('/accounts/:accountId/usage-history', authenticateAdmin, async (req, cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const fbEph5m = parseInt(dailyData?.ephemeral5mTokens) || 0 + const fbEph1h = parseInt(dailyData?.ephemeral1hTokens) || 0 + if (fbEph5m > 0 || fbEph1h > 0) { + fallbackUsage.cache_creation = { + ephemeral_5m_input_tokens: fbEph5m, + ephemeral_1h_input_tokens: fbEph1h + } + } const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel) cost = fallbackResult.costs.total } @@ -653,12 +672,23 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: modelCacheCreateTokens, cache_read_input_tokens: modelCacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const mEph5m = parseInt(data.ephemeral5mTokens) || 0 + const mEph1h = parseInt(data.ephemeral1hTokens) || 0 + if (mEph5m > 0 || mEph1h > 0) { + modelUsage.cache_creation = { + ephemeral_5m_input_tokens: mEph5m, + ephemeral_1h_input_tokens: mEph1h + } + } const modelCostResult = CostCalculator.calculateCost(modelUsage, model) hourCost += modelCostResult.costs.total } // 如果没有模型级别的数据,尝试API Key级别的数据 if (modelKeys.length === 0) { + let hourEph5m = 0 + let hourEph1h = 0 for (const key of usageKeys) { const data = usageDataMap.get(key) if (data) { @@ -667,6 +697,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { hourRequests += parseInt(data.requests) || 0 hourCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 hourCacheReadTokens += parseInt(data.cacheReadTokens) || 0 + hourEph5m += parseInt(data.ephemeral5mTokens) || 0 + hourEph1h += parseInt(data.ephemeral1hTokens) || 0 } } @@ -676,6 +708,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: hourCacheCreateTokens, cache_read_input_tokens: hourCacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (hourEph5m > 0 || hourEph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: hourEph5m, + ephemeral_1h_input_tokens: hourEph1h + } + } const costResult = CostCalculator.calculateCost(usage, 'unknown') hourCost = costResult.costs.total } @@ -817,6 +856,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { // 如果没有模型级别的数据,回退到原始方法 if (modelKeys.length === 0 && usageKeys.length > 0) { + let dayEph5m = 0 + let dayEph1h = 0 for (const key of usageKeys) { const data = usageDataMap.get(key) if (data) { @@ -825,6 +866,8 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { dayRequests += parseInt(data.requests) || 0 dayCacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 dayCacheReadTokens += parseInt(data.cacheReadTokens) || 0 + dayEph5m += parseInt(data.ephemeral5mTokens) || 0 + dayEph1h += parseInt(data.ephemeral1hTokens) || 0 } } @@ -834,6 +877,13 @@ router.get('/usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: dayCacheCreateTokens, cache_read_input_tokens: dayCacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (dayEph5m > 0 || dayEph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: dayEph5m, + ephemeral_1h_input_tokens: dayEph1h + } + } const costResult = CostCalculator.calculateCost(usage, 'unknown') dayCost = costResult.costs.total } @@ -1097,6 +1147,16 @@ router.get('/api-keys/:keyId/model-stats', authenticateAdmin, async (req, res) = cache_read_input_tokens: usageData.cacheReadTokens || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const histEph5m = usageData.ephemeral5mTokens || 0 + const histEph1h = usageData.ephemeral1hTokens || 0 + if (histEph5m > 0 || histEph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: histEph5m, + ephemeral_1h_input_tokens: histEph1h + } + } + // 对于汇总数据,使用默认模型计算费用 const costData = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022') @@ -1472,6 +1532,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const fbEph5m = parseInt(data.ephemeral5mTokens) || 0 + const fbEph1h = parseInt(data.ephemeral1hTokens) || 0 + if (fbEph5m > 0 || fbEph1h > 0) { + fallbackUsage.cache_creation = { + ephemeral_5m_input_tokens: fbEph5m, + ephemeral_1h_input_tokens: fbEph1h + } + } const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel) cost = fallbackResult.costs.total } @@ -1640,6 +1709,15 @@ router.get('/account-usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const fbEph5m = parseInt(data.ephemeral5mTokens) || 0 + const fbEph1h = parseInt(data.ephemeral1hTokens) || 0 + if (fbEph5m > 0 || fbEph1h > 0) { + fallbackUsage.cache_creation = { + ephemeral_5m_input_tokens: fbEph5m, + ephemeral_1h_input_tokens: fbEph1h + } + } const fallbackResult = CostCalculator.calculateCost(fallbackUsage, fallbackModel) cost = fallbackResult.costs.total } @@ -1834,7 +1912,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { inputTokens, outputTokens, cacheCreateTokens, - cacheReadTokens + cacheReadTokens, + ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0, + ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0 }) } @@ -1860,6 +1940,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) const currentCost = apiKeyCostMap.get(apiKeyId) || 0 apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total) @@ -1878,6 +1968,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: data.cacheCreateTokens, cache_read_input_tokens: data.cacheReadTokens } + if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: data.ephemeral5mTokens, + ephemeral_1h_input_tokens: data.ephemeral1hTokens + } + } const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022') cost = fallbackResult.costs.total formattedCost = fallbackResult.formatted.total @@ -1994,7 +2090,9 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { inputTokens, outputTokens, cacheCreateTokens, - cacheReadTokens + cacheReadTokens, + ephemeral5mTokens: parseInt(data.ephemeral5mTokens) || 0, + ephemeral1hTokens: parseInt(data.ephemeral1hTokens) || 0 }) } @@ -2020,6 +2118,16 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { cache_read_input_tokens: parseInt(modelData.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(modelData.ephemeral5mTokens) || 0 + const eph1h = parseInt(modelData.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) const currentCost = apiKeyCostMap.get(apiKeyId) || 0 apiKeyCostMap.set(apiKeyId, currentCost + costResult.costs.total) @@ -2038,6 +2146,12 @@ router.get('/api-keys-usage-trend', authenticateAdmin, async (req, res) => { cache_creation_input_tokens: data.cacheCreateTokens, cache_read_input_tokens: data.cacheReadTokens } + if (data.ephemeral5mTokens > 0 || data.ephemeral1hTokens > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: data.ephemeral5mTokens, + ephemeral_1h_input_tokens: data.ephemeral1hTokens + } + } const fallbackResult = CostCalculator.calculateCost(usage, 'claude-3-5-sonnet-20241022') cost = fallbackResult.costs.total formattedCost = fallbackResult.formatted.total @@ -2189,7 +2303,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { inputTokens: 0, outputTokens: 0, cacheCreateTokens: 0, - cacheReadTokens: 0 + cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0 }) } @@ -2198,6 +2314,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { modelUsage.outputTokens += parseInt(data.outputTokens) || 0 modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 } // 计算7天统计的费用 @@ -2211,6 +2329,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + const costResult = CostCalculator.calculateCost(usageData, model) totalCosts.inputCost += costResult.costs.input totalCosts.outputCost += costResult.costs.output @@ -2290,7 +2416,9 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { inputTokens: 0, outputTokens: 0, cacheCreateTokens: 0, - cacheReadTokens: 0 + cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0 }) } @@ -2299,6 +2427,8 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { modelUsage.outputTokens += parseInt(data.outputTokens) || 0 modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 } // 使用模型级别的数据计算费用 @@ -2312,6 +2442,14 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + const costResult = CostCalculator.calculateCost(usageData, model) totalCosts.inputCost += costResult.costs.input totalCosts.outputCost += costResult.costs.output @@ -2352,6 +2490,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { cache_read_input_tokens: apiKey.usage.total.cacheReadTokens || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const totalEph5m = apiKey.usage.total.ephemeral5mTokens || 0 + const totalEph1h = apiKey.usage.total.ephemeral1hTokens || 0 + if (totalEph5m > 0 || totalEph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: totalEph5m, + ephemeral_1h_input_tokens: totalEph1h + } + } + // 使用加权平均价格计算(基于当前活跃模型的价格分布) const costResult = CostCalculator.calculateCost(usage, 'claude-3-5-haiku-20241022') totalCosts.inputCost += costResult.costs.input @@ -2424,6 +2572,16 @@ router.get('/usage-costs', authenticateAdmin, async (req, res) => { cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + const eph5m = parseInt(data.ephemeral5mTokens) || 0 + const eph1h = parseInt(data.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) // 累加总费用 @@ -2564,13 +2722,27 @@ router.get('/api-keys/:keyId/usage-records', authenticateAdmin, async (req, res) return null } - const toUsageObject = (record) => ({ - input_tokens: record.inputTokens || 0, - output_tokens: record.outputTokens || 0, - cache_creation_input_tokens: record.cacheCreateTokens || 0, - cache_read_input_tokens: record.cacheReadTokens || 0, - cache_creation: record.cacheCreation || record.cache_creation || null - }) + const toUsageObject = (record) => { + const usage = { + input_tokens: record.inputTokens || 0, + output_tokens: record.outputTokens || 0, + cache_creation_input_tokens: record.cacheCreateTokens || 0, + cache_read_input_tokens: record.cacheReadTokens || 0, + cache_creation: record.cacheCreation || record.cache_creation || null + } + // 如果没有 cache_creation 但有独立存储的 ephemeral 字段,构建子对象 + if (!usage.cache_creation) { + const eph5m = parseInt(record.ephemeral5mTokens) || 0 + const eph1h = parseInt(record.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + } + return usage + } const withinRange = (record) => { if (!record.timestamp) { @@ -2863,13 +3035,27 @@ router.get('/accounts/:accountId/usage-records', authenticateAdmin, async (req, keysToUse = [{ id: apiKeyId }] } - const toUsageObject = (record) => ({ - input_tokens: record.inputTokens || 0, - output_tokens: record.outputTokens || 0, - cache_creation_input_tokens: record.cacheCreateTokens || 0, - cache_read_input_tokens: record.cacheReadTokens || 0, - cache_creation: record.cacheCreation || record.cache_creation || null - }) + const toUsageObject = (record) => { + const usage = { + input_tokens: record.inputTokens || 0, + output_tokens: record.outputTokens || 0, + cache_creation_input_tokens: record.cacheCreateTokens || 0, + cache_read_input_tokens: record.cacheReadTokens || 0, + cache_creation: record.cacheCreation || record.cache_creation || null + } + // 如果没有 cache_creation 但有独立存储的 ephemeral 字段,构建子对象 + if (!usage.cache_creation) { + const eph5m = parseInt(record.ephemeral5mTokens) || 0 + const eph1h = parseInt(record.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + } + return usage + } const withinRange = (record) => { if (!record.timestamp) { diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js index a156df65..86f1738b 100644 --- a/src/routes/apiStats.js +++ b/src/routes/apiStats.js @@ -317,6 +317,14 @@ router.post('/api/user-stats', async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + costUsage.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022') totalCost = costResult.costs.total } @@ -335,6 +343,14 @@ router.post('/api/user-stats', async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + costUsage.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + const costResult = CostCalculator.calculateCost(costUsage, 'claude-3-5-sonnet-20241022') totalCost = costResult.costs.total formattedCost = costResult.formatted.total @@ -804,6 +820,8 @@ router.post('/api/batch-model-stats', async (req, res) => { outputTokens: 0, cacheCreateTokens: 0, cacheReadTokens: 0, + ephemeral5mTokens: 0, + ephemeral1hTokens: 0, allTokens: 0, realCostMicro: 0, ratedCostMicro: 0, @@ -817,6 +835,8 @@ router.post('/api/batch-model-stats', async (req, res) => { modelUsage.outputTokens += parseInt(data.outputTokens) || 0 modelUsage.cacheCreateTokens += parseInt(data.cacheCreateTokens) || 0 modelUsage.cacheReadTokens += parseInt(data.cacheReadTokens) || 0 + modelUsage.ephemeral5mTokens += parseInt(data.ephemeral5mTokens) || 0 + modelUsage.ephemeral1hTokens += parseInt(data.ephemeral1hTokens) || 0 modelUsage.allTokens += parseInt(data.allTokens) || 0 modelUsage.realCostMicro += parseInt(data.realCostMicro) || 0 modelUsage.ratedCostMicro += parseInt(data.ratedCostMicro) || 0 @@ -839,6 +859,14 @@ router.post('/api/batch-model-stats', async (req, res) => { cache_read_input_tokens: usage.cacheReadTokens } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (usage.ephemeral5mTokens > 0 || usage.ephemeral1hTokens > 0) { + usageData.cache_creation = { + ephemeral_5m_input_tokens: usage.ephemeral5mTokens, + ephemeral_1h_input_tokens: usage.ephemeral1hTokens + } + } + // 优先使用存储的费用,否则回退到重新计算 const { hasStoredCost } = usage const costData = CostCalculator.calculateCost(usageData, model) @@ -1368,6 +1396,8 @@ router.post('/api/user-model-stats', async (req, res) => { const model = match[1] if (data && Object.keys(data).length > 0) { + const ephemeral5m = parseInt(data.ephemeral5mTokens) || 0 + const ephemeral1h = parseInt(data.ephemeral1hTokens) || 0 const usage = { input_tokens: parseInt(data.inputTokens) || 0, output_tokens: parseInt(data.outputTokens) || 0, @@ -1375,6 +1405,14 @@ router.post('/api/user-model-stats', async (req, res) => { cache_read_input_tokens: parseInt(data.cacheReadTokens) || 0 } + // 如果有 ephemeral 5m/1h 拆分数据,添加 cache_creation 子对象以实现精确计费 + if (ephemeral5m > 0 || ephemeral1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: ephemeral5m, + ephemeral_1h_input_tokens: ephemeral1h + } + } + // 优先使用存储的费用,否则回退到重新计算 // 检查字段是否存在(而非 > 0),以支持真正的零成本场景 const realCostMicro = parseInt(data.realCostMicro) || 0 diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js index 0ca80b9f..605707aa 100644 --- a/src/services/apiKeyService.js +++ b/src/services/apiKeyService.js @@ -1599,6 +1599,8 @@ class ApiKeyService { outputTokens, cacheCreateTokens, cacheReadTokens, + 0, // ephemeral5mTokens - recordUsage 不含详细缓存数据 + 0, // ephemeral1hTokens - recordUsage 不含详细缓存数据 model, isLongContextRequest ) @@ -1834,6 +1836,8 @@ class ApiKeyService { outputTokens, cacheCreateTokens, cacheReadTokens, + ephemeral5mTokens, + ephemeral1hTokens, model, costInfo.isLongContextRequest || false ) diff --git a/src/services/costInitService.js b/src/services/costInitService.js index 5463871f..3207f996 100644 --- a/src/services/costInitService.js +++ b/src/services/costInitService.js @@ -201,6 +201,16 @@ class CostInitService { parseInt(data.totalCacheReadTokens) || parseInt(data.cacheReadTokens) || 0 } + // 添加 cache_creation 子对象以支持精确 ephemeral 定价 + const eph5m = parseInt(data.ephemeral5mTokens) || 0 + const eph1h = parseInt(data.ephemeral1hTokens) || 0 + if (eph5m > 0 || eph1h > 0) { + usage.cache_creation = { + ephemeral_5m_input_tokens: eph5m, + ephemeral_1h_input_tokens: eph1h + } + } + const costResult = CostCalculator.calculateCost(usage, model) const cost = costResult.costs.total diff --git a/src/services/droidRelayService.js b/src/services/droidRelayService.js index 8e663611..0ed4e37e 100644 --- a/src/services/droidRelayService.js +++ b/src/services/droidRelayService.js @@ -1275,6 +1275,8 @@ class DroidRelayService { usageObject.output_tokens || 0, usageObject.cache_creation_input_tokens || 0, usageObject.cache_read_input_tokens || 0, + 0, // ephemeral5mTokens - Droid 不含详细缓存数据 + 0, // ephemeral1hTokens - Droid 不含详细缓存数据 model, false ) From 93b655d65c20a999af46756a99ff75520f5f92a2 Mon Sep 17 00:00:00 2001 From: sczheng189 <724100151@qq.com> Date: Mon, 23 Feb 2026 21:23:38 +0800 Subject: [PATCH 5/6] Revert "fix: update the limit logic, and if the daily limit is not reached after reaching the opus weekly limit, other claude models can be used" This reverts commit f444af49bfb0159f6aa0835b40a7049c2e4199d6. --- src/middleware/auth.js | 4 ++-- src/services/apiKeyService.js | 4 ++-- src/services/weeklyClaudeCostInitService.js | 4 ++-- src/utils/modelHelper.js | 17 ----------------- 4 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/middleware/auth.js b/src/middleware/auth.js index 66fa1387..cf76b0b7 100644 --- a/src/middleware/auth.js +++ b/src/middleware/auth.js @@ -9,7 +9,7 @@ const ClientValidator = require('../validators/clientValidator') const ClaudeCodeValidator = require('../validators/clients/claudeCodeValidator') const claudeRelayConfigService = require('../services/claudeRelayConfigService') const { calculateWaitTimeStats } = require('../utils/statsHelper') -const { isOpusModel } = require('../utils/modelHelper') +const { isClaudeFamilyModel } = require('../utils/modelHelper') // 工具函数 function sleep(ms) { @@ -1256,7 +1256,7 @@ const authenticateApiKey = async (req, res, next) => { const model = requestBody.model || '' // 判断是否为 Claude 模型 - if (isOpusModel(model)) { + if (isClaudeFamilyModel(model)) { const weeklyOpusCost = validation.keyData.weeklyOpusCost || 0 if (weeklyOpusCost >= weeklyOpusCostLimit) { diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js index b51e6637..605707aa 100644 --- a/src/services/apiKeyService.js +++ b/src/services/apiKeyService.js @@ -4,7 +4,7 @@ const config = require('../../config/config') const redis = require('../models/redis') const logger = require('../utils/logger') const serviceRatesService = require('./serviceRatesService') -const { isOpusModel } = require('../utils/modelHelper') +const { isClaudeFamilyModel } = require('../utils/modelHelper') const ACCOUNT_TYPE_CONFIG = { claude: { prefix: 'claude:account:' }, @@ -1651,7 +1651,7 @@ class ApiKeyService { async recordOpusCost(keyId, ratedCost, realCost, model, accountType) { try { // 判断是否为 Claude 系列模型(包含 Bedrock 格式等) - if (!isOpusModel(model)) { + if (!isClaudeFamilyModel(model)) { return } diff --git a/src/services/weeklyClaudeCostInitService.js b/src/services/weeklyClaudeCostInitService.js index 2dfb1470..1268329f 100644 --- a/src/services/weeklyClaudeCostInitService.js +++ b/src/services/weeklyClaudeCostInitService.js @@ -2,7 +2,7 @@ const redis = require('../models/redis') const logger = require('../utils/logger') const pricingService = require('./pricingService') const serviceRatesService = require('./serviceRatesService') -const { isOpusModel } = require('../utils/modelHelper') +const { isClaudeFamilyModel } = require('../utils/modelHelper') function pad2(n) { return String(n).padStart(2, '0') @@ -151,7 +151,7 @@ class WeeklyClaudeCostInitService { } const keyId = match[1] const model = match[2] - if (!isOpusModel(model)) { + if (!isClaudeFamilyModel(model)) { continue } matchedClaudeKeys++ diff --git a/src/utils/modelHelper.js b/src/utils/modelHelper.js index 91fda718..c3fecc98 100644 --- a/src/utils/modelHelper.js +++ b/src/utils/modelHelper.js @@ -188,22 +188,6 @@ function isOpus45OrNewer(modelName) { return false } -/** - * 判断是否为 Opus 模型(任意版本) - * 匹配所有包含 "opus" 关键词的 Claude 模型 - */ -function isOpusModel(modelName) { - if (!modelName || typeof modelName !== 'string') { - return false - } - const { baseModel } = parseVendorPrefixedModel(modelName) - const m = (baseModel || '').trim().toLowerCase() - if (!m) { - return false - } - return m.includes('opus') -} - /** * 判断某个 model 名称是否属于 Anthropic Claude 系列模型。 * @@ -253,6 +237,5 @@ module.exports = { getEffectiveModel, getVendorType, isOpus45OrNewer, - isOpusModel, isClaudeFamilyModel } From 823693afda2ff8354974568ef28f6d33e0fc687b Mon Sep 17 00:00:00 2001 From: sczheng189 <724100151@qq.com> Date: Mon, 23 Feb 2026 23:27:19 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/routes/api.js | 4 +++- src/services/pricingService.js | 14 +++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/routes/api.js b/src/routes/api.js index c469dd2f..85ad01f5 100644 --- a/src/routes/api.js +++ b/src/routes/api.js @@ -469,7 +469,9 @@ async function handleMessagesRequest(req, res) { cache_read_input_tokens: cacheReadTokens } const requestBetaHeader = - _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA'] + _headers['anthropic-beta'] || + _headers['Anthropic-Beta'] || + _headers['ANTHROPIC-BETA'] if (requestBetaHeader) { usageObject.request_anthropic_beta = requestBetaHeader } diff --git a/src/services/pricingService.js b/src/services/pricingService.js index 55423361..0eb3b2f6 100644 --- a/src/services/pricingService.js +++ b/src/services/pricingService.js @@ -566,11 +566,11 @@ class PricingService { if (!key.startsWith('fast/')) { continue } - const normalizedFastKey = key - .slice('fast/'.length) - .toLowerCase() - .replace(/[_-]/g, '') - if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) { + const normalizedFastKey = key.slice('fast/'.length).toLowerCase().replace(/[_-]/g, '') + if ( + normalizedFastKey.includes(normalizedModel) || + normalizedModel.includes(normalizedFastKey) + ) { logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`) return value } @@ -700,7 +700,7 @@ class PricingService { // 确定实际使用的输入价格(普通或 200K+ 高档价格) // Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底 - let actualInputPrice = useLongContextPricing + const actualInputPrice = useLongContextPricing ? hasInput200kPrice ? pricing.input_cost_per_token_above_200k_tokens : isClaudeModel @@ -712,7 +712,7 @@ class PricingService { const hasOutput200kPrice = pricing.output_cost_per_token_above_200k_tokens !== null && pricing.output_cost_per_token_above_200k_tokens !== undefined - let actualOutputPrice = useLongContextPricing + const actualOutputPrice = useLongContextPricing ? hasOutput200kPrice ? pricing.output_cost_per_token_above_200k_tokens : baseOutputPrice