diff --git a/src/routes/api.js b/src/routes/api.js index 4d5647e4..337cec5c 100644 --- a/src/routes/api.js +++ b/src/routes/api.js @@ -468,6 +468,17 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) { + usageObject.request_speed = _requestBody.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { @@ -562,6 +573,22 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headersConsole['anthropic-beta'] || + _headersConsole['Anthropic-Beta'] || + _headersConsole['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if ( + typeof _requestBodyConsole?.speed === 'string' && + _requestBodyConsole.speed.trim() + ) { + usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { @@ -728,6 +755,19 @@ async function handleMessagesRequest(req, res) { cache_creation_input_tokens: cacheCreateTokens, cache_read_input_tokens: cacheReadTokens } + const requestBetaHeader = + _headersCcr['anthropic-beta'] || + _headersCcr['Anthropic-Beta'] || + _headersCcr['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageObject.request_anthropic_beta = requestBetaHeader + } + if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) { + usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase() + } + if (typeof usageData.speed === 'string' && usageData.speed.trim()) { + usageObject.speed = usageData.speed.trim().toLowerCase() + } // 如果有详细的缓存创建数据,添加到 usage 对象中 if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) { diff --git a/src/routes/openaiClaudeRoutes.js b/src/routes/openaiClaudeRoutes.js index ae791b56..fc910b6a 100644 --- a/src/routes/openaiClaudeRoutes.js +++ b/src/routes/openaiClaudeRoutes.js @@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) { (usage.cache_creation.ephemeral_1h_input_tokens || 0) : usage.cache_creation_input_tokens || 0) || 0 const cacheReadTokens = usage.cache_read_input_tokens || 0 + const usageWithRequestMeta = { ...usage } + const requestBetaHeader = + req.headers['anthropic-beta'] || + req.headers['Anthropic-Beta'] || + req.headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageWithRequestMeta.request_anthropic_beta = requestBetaHeader + } + if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) { + usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase() + } // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据 apiKeyService .recordUsageWithDetails( apiKeyData.id, - usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据 + usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed) model, accountId, accountType @@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) { (usage.cache_creation.ephemeral_1h_input_tokens || 0) : usage.cache_creation_input_tokens || 0) || 0 const cacheReadTokens = usage.cache_read_input_tokens || 0 + const usageWithRequestMeta = { ...usage } + const requestBetaHeader = + req.headers['anthropic-beta'] || + req.headers['Anthropic-Beta'] || + req.headers['ANTHROPIC-BETA'] + if (requestBetaHeader) { + usageWithRequestMeta.request_anthropic_beta = requestBetaHeader + } + if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) { + usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase() + } // 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据 apiKeyService .recordUsageWithDetails( apiKeyData.id, - usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据 + usageWithRequestMeta, // 传递 usage + 请求模式元信息(beta/speed) claudeRequest.model, accountId, accountType diff --git a/src/services/pricingService.js b/src/services/pricingService.js index 6f513f13..55423361 100644 --- a/src/services/pricingService.js +++ b/src/services/pricingService.js @@ -63,6 +63,20 @@ class PricingService { 'claude-haiku-3': 0.0000016, 'claude-haiku-3-5': 0.0000016 } + + // Claude Prompt Caching 官方倍率(基于输入价格) + this.claudeCacheMultipliers = { + write5m: 1.25, + write1h: 2, + read: 0.1 + } + + // Claude 扩展计费特性 + this.claudeFeatureFlags = { + context1mBeta: 'context-1m-2025-08-07', + fastModeBeta: 'fast-mode-2026-02-01', + fastModeSpeed: 'fast' + } } // 初始化价格服务 @@ -451,14 +465,139 @@ class PricingService { return pricing } - // 获取 1 小时缓存价格 - getEphemeral1hPricing(modelName) { + // 从 usage 对象中提取 beta 特性列表(小写) + extractBetaFeatures(usage) { + const features = new Set() + if (!usage || typeof usage !== 'object') { + return features + } + + const requestHeaders = usage.request_headers || usage.requestHeaders || null + const headerBeta = + requestHeaders && typeof requestHeaders === 'object' + ? requestHeaders['anthropic-beta'] || + requestHeaders['Anthropic-Beta'] || + requestHeaders['ANTHROPIC-BETA'] + : null + + const candidates = [ + usage.anthropic_beta, + usage.anthropicBeta, + usage.request_anthropic_beta, + usage.requestAnthropicBeta, + usage.beta_header, + usage.betaHeader, + usage.beta_features, + headerBeta + ] + + const addFeature = (value) => { + if (!value || typeof value !== 'string') { + return + } + value + .split(',') + .map((item) => item.trim().toLowerCase()) + .filter(Boolean) + .forEach((item) => features.add(item)) + } + + for (const candidate of candidates) { + if (Array.isArray(candidate)) { + candidate.forEach(addFeature) + } else { + addFeature(candidate) + } + } + + return features + } + + // 提取请求/响应中的 speed 字段(小写) + extractSpeedSignal(usage) { + if (!usage || typeof usage !== 'object') { + return { responseSpeed: '', requestSpeed: '' } + } + + const normalize = (value) => + typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : '' + + return { + responseSpeed: normalize(usage.speed), + requestSpeed: normalize(usage.request_speed || usage.requestSpeed) + } + } + + // Claude Fast Mode 目前仅适用于 Opus 4.6 系列 + isFastModeEligibleClaudeModel(modelName) { + return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6') + } + + // 去掉模型名中的 [1m] 后缀,便于价格查找 + stripLongContextSuffix(modelName) { + if (typeof modelName !== 'string') { + return modelName + } + return modelName.replace(/\[1m\]/gi, '').trim() + } + + // 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀) + getFastModePricing(modelName) { + if (!this.pricingData || !modelName) { + return null + } + + const cleanedModelName = this.stripLongContextSuffix(modelName) + const exactCandidates = new Set([`fast/${cleanedModelName}`]) + + if (cleanedModelName.startsWith('fast/')) { + exactCandidates.add(cleanedModelName) + } + + for (const candidate of exactCandidates) { + if (this.pricingData[candidate]) { + logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`) + return this.pricingData[candidate] + } + } + + const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '') + for (const [key, value] of Object.entries(this.pricingData)) { + if (!key.startsWith('fast/')) { + continue + } + const normalizedFastKey = key + .slice('fast/'.length) + .toLowerCase() + .replace(/[_-]/g, '') + if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) { + logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`) + return value + } + } + + logger.debug(`💰 No fast pricing found for model: ${modelName}`) + return null + } + + // 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段) + getEphemeral1hPricing(modelName, pricing = null) { + if ( + pricing?.cache_creation_input_token_cost_above_1hr !== null && + pricing?.cache_creation_input_token_cost_above_1hr !== undefined + ) { + return pricing.cache_creation_input_token_cost_above_1hr + } + if (!modelName) { return 0 } // 尝试直接匹配 - if (this.ephemeral1hPricing[modelName]) { + if ( + this.ephemeral1hPricing[modelName] !== null && + this.ephemeral1hPricing[modelName] !== undefined + ) { return this.ephemeral1hPricing[modelName] } @@ -487,8 +626,10 @@ class PricingService { // 计算使用费用 calculateCost(usage, modelName) { + const normalizedModelName = this.stripLongContextSuffix(modelName) + // 检查是否为 1M 上下文模型(用户通过 [1m] 后缀主动选择长上下文模式) - const isLongContextModel = modelName && modelName.includes('[1m]') + const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]') let isLongContextRequest = false let useLongContextPricing = false @@ -498,27 +639,31 @@ class PricingService { const cacheReadTokens = usage.cache_read_input_tokens || 0 const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens - // 获取模型定价信息 - const pricing = this.getModelPricing(modelName) + // 识别 Claude 特性标识 + const betaFeatures = this.extractBetaFeatures(usage) + const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta) + const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta) + const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage) + const hasFastSpeedSignal = + responseSpeed === this.claudeFeatureFlags.fastModeSpeed || + requestSpeed === this.claudeFeatureFlags.fastModeSpeed + const isFastModeRequest = + hasFastModeBeta && + hasFastSpeedSignal && + this.isFastModeEligibleClaudeModel(normalizedModelName) + const standardPricing = this.getModelPricing(modelName) + const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null + const pricing = fastPricing || standardPricing + const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta - // 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时,使用高档价格 + // 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑 // 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格 - if (isLongContextModel && totalInputTokens > 200000) { + if (isLongContextModeEnabled && totalInputTokens > 200000) { isLongContextRequest = true - // 检查 model_pricing.json 是否有 above_200k 字段 - if ( - pricing?.input_cost_per_token_above_200k_tokens !== null && - pricing?.input_cost_per_token_above_200k_tokens !== undefined - ) { - useLongContextPricing = true - logger.info( - `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}` - ) - } else { - logger.warn( - `⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json` - ) - } + useLongContextPricing = true + logger.info( + `💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}` + ) } if (!pricing) { @@ -535,32 +680,76 @@ class PricingService { } } - // 确定实际使用的价格(普通或 200K+ 高档价格) - const actualInputPrice = useLongContextPricing - ? pricing.input_cost_per_token_above_200k_tokens - : pricing.input_cost_per_token || 0 + const isClaudeModel = + (modelName && modelName.toLowerCase().includes('claude')) || + (typeof pricing?.litellm_provider === 'string' && + pricing.litellm_provider.toLowerCase().includes('anthropic')) - const actualOutputPrice = useLongContextPricing - ? pricing.output_cost_per_token_above_200k_tokens - : pricing.output_cost_per_token || 0 + if (isFastModeRequest && fastPricing) { + logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`) + } else if (isFastModeRequest && !fastPricing) { + logger.warn( + `⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile` + ) + } - const actualCacheCreatePrice = useLongContextPricing - ? pricing.cache_creation_input_token_cost_above_200k_tokens || - pricing.cache_creation_input_token_cost || - 0 - : pricing.cache_creation_input_token_cost || 0 + const baseInputPrice = pricing.input_cost_per_token || 0 + const hasInput200kPrice = + pricing.input_cost_per_token_above_200k_tokens !== null && + pricing.input_cost_per_token_above_200k_tokens !== undefined - const actualCacheReadPrice = useLongContextPricing - ? pricing.cache_read_input_token_cost_above_200k_tokens || - pricing.cache_read_input_token_cost || - 0 - : pricing.cache_read_input_token_cost || 0 + // 确定实际使用的输入价格(普通或 200K+ 高档价格) + // Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底 + let actualInputPrice = useLongContextPricing + ? hasInput200kPrice + ? pricing.input_cost_per_token_above_200k_tokens + : isClaudeModel + ? baseInputPrice * 2 + : baseInputPrice + : baseInputPrice - // 1小时缓存的 200K+ 价格 - const actualEphemeral1hPrice = useLongContextPricing - ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens || - this.getEphemeral1hPricing(modelName) - : this.getEphemeral1hPricing(modelName) + const baseOutputPrice = pricing.output_cost_per_token || 0 + const hasOutput200kPrice = + pricing.output_cost_per_token_above_200k_tokens !== null && + pricing.output_cost_per_token_above_200k_tokens !== undefined + let actualOutputPrice = useLongContextPricing + ? hasOutput200kPrice + ? pricing.output_cost_per_token_above_200k_tokens + : baseOutputPrice + : baseOutputPrice + + let actualCacheCreatePrice = 0 + let actualCacheReadPrice = 0 + let actualEphemeral1hPrice = 0 + + if (isClaudeModel) { + // Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差 + actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m + actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read + actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h + } else { + actualCacheCreatePrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_200k_tokens || + pricing.cache_creation_input_token_cost || + 0 + : pricing.cache_creation_input_token_cost || 0 + + actualCacheReadPrice = useLongContextPricing + ? pricing.cache_read_input_token_cost_above_200k_tokens || + pricing.cache_read_input_token_cost || + 0 + : pricing.cache_read_input_token_cost || 0 + + const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing) + + // 非 Claude 模型维持原有字段优先级 + actualEphemeral1hPrice = useLongContextPricing + ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null && + pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined + ? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens + : defaultEphemeral1hPrice + : defaultEphemeral1hPrice + } // 计算各项费用 const inputCost = inputTokens * actualInputPrice diff --git a/tests/pricingService.test.js b/tests/pricingService.test.js index 60e71281..40dc0dd7 100644 --- a/tests/pricingService.test.js +++ b/tests/pricingService.test.js @@ -2,8 +2,11 @@ * PricingService 长上下文(200K+)分层计费测试 * * 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑: - * - 使用 model_pricing.json 中的 *_above_200k_tokens 字段 - * - 所有 token 类型(input/output/cache_create/cache_read)都切换到高档价格 + * - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段 + * - Claude 缓存价格按输入价格倍率推导: + * - 5m cache write = input * 1.25 + * - 1h cache write = input * 2 + * - cache read = input * 0.1 */ // Mock logger to avoid console output during tests @@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { output_cost_per_token: 0.000015, // $15/MTok cache_creation_input_token_cost: 0.00000375, // $3.75/MTok cache_read_input_token_cost: 0.0000003, // $0.30/MTok + max_input_tokens: 1000000, // 200K+ 高档价格 input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x) output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x) @@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => { output_cost_per_token: 0.00000125, cache_creation_input_token_cost: 0.0000003, cache_read_input_token_cost: 0.00000003 + }, + // Fast Mode 适配测试模型(Opus 4.6) + 'claude-opus-4-6': { + input_cost_per_token: 0.000005, + output_cost_per_token: 0.000025, + cache_creation_input_token_cost: 0.00000625, + cache_read_input_token_cost: 0.0000005, + input_cost_per_token_above_200k_tokens: 0.00001, + output_cost_per_token_above_200k_tokens: 0.0000375 } } @@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.pricing.input).toBe(0.000006) expect(result.pricing.output).toBe(0.0000225) expect(result.pricing.cacheCreate).toBe(0.0000075) - expect(result.pricing.cacheRead).toBe(0.0000006) + expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12) }) it('仅 cache_creation + cache_read 超过 200K 也应触发', () => { @@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => { const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]') // cache_read_input_token_cost_above_200k_tokens = 0.0000006 - expect(result.pricing.cacheRead).toBe(0.0000006) + expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12) expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10) }) }) describe('详细缓存创建数据(ephemeral_5m / ephemeral_1h)', () => { - it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => { + it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => { const usage = { input_tokens: 200001, output_tokens: 1000, @@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.isLongContextRequest).toBe(true) // ephemeral_5m: 5000 * 0.0000075 = 0.0000375 expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10) - // ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k) - expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10) + // 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012 + expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10) + expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10) }) }) describe('回退测试', () => { - it('模型无 above_200k 字段时回退到基础价格', () => { + it('Claude 模型无 above_200k 字段时,200K+ 输入价格按 2 倍并推导缓存价格', () => { const usage = { input_tokens: 250000, output_tokens: 1000, - cache_creation_input_tokens: 0, - cache_read_input_tokens: 0 + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 10000 } const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]') - // 模型没有 above_200k 字段,使用基础价格 - expect(result.isLongContextRequest).toBe(true) // 超过 200K - expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段) - expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格 + // 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底 + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2 + // 缓存价格由输入价格推导 + expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25 + expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1 + }) + }) + + describe('Header 与 Fast Mode 适配', () => { + it('无 [1m] 后缀但带 context-1m beta,超过 200K 时应触发长上下文计费', () => { + const usage = { + input_tokens: 210000, + output_tokens: 1000, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + request_anthropic_beta: 'context-1m-2025-08-07' + } + + const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514') + + expect(result.isLongContextRequest).toBe(true) + expect(result.pricing.input).toBe(0.000006) + expect(result.pricing.output).toBe(0.0000225) + }) + + it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => { + const usage = { + input_tokens: 100000, + output_tokens: 20000, + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 5000, + request_anthropic_beta: 'fast-mode-2026-02-01', + speed: 'fast' + } + + const result = pricingService.calculateCost(usage, 'claude-opus-4-6') + + // input: 0.000005 * 6 = 0.00003 + expect(result.pricing.input).toBeCloseTo(0.00003, 12) + // output: 0.000025 * 6 = 0.00015 + expect(result.pricing.output).toBeCloseTo(0.00015, 12) + // cache create/read 由 fast 后 input 推导 + expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25 + expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1 + expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2 + }) + + it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费(12x input)', () => { + const usage = { + input_tokens: 210000, + output_tokens: 1000, + cache_creation_input_tokens: 10000, + cache_read_input_tokens: 10000, + request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07', + speed: 'fast' + } + + const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]') + + expect(result.isLongContextRequest).toBe(true) + // input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x) + expect(result.pricing.input).toBeCloseTo(0.00006, 12) + // output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x) + expect(result.pricing.output).toBeCloseTo(0.000225, 12) }) }) @@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => { expect(result.pricing.input).toBe(0.000003) // 基础价格 expect(result.pricing.output).toBe(0.000015) // 基础价格 expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格 - expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格 + expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格 }) it('[1m] 模型未超过 200K 时使用基础价格', () => {