feat: 添加 Claude 计费特性和请求元信息支持,优化长上下文计费逻辑

This commit is contained in:
sczheng189
2026-02-14 21:32:09 +08:00
parent d40c891e4f
commit 3b25cf01ad
4 changed files with 387 additions and 61 deletions

View File

@@ -468,6 +468,17 @@ async function handleMessagesRequest(req, res) {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
}
const requestBetaHeader =
_headers['anthropic-beta'] || _headers['Anthropic-Beta'] || _headers['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageObject.request_anthropic_beta = requestBetaHeader
}
if (typeof _requestBody?.speed === 'string' && _requestBody.speed.trim()) {
usageObject.request_speed = _requestBody.speed.trim().toLowerCase()
}
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
usageObject.speed = usageData.speed.trim().toLowerCase()
}
// 如果有详细的缓存创建数据,添加到 usage 对象中
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -562,6 +573,22 @@ async function handleMessagesRequest(req, res) {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
}
const requestBetaHeader =
_headersConsole['anthropic-beta'] ||
_headersConsole['Anthropic-Beta'] ||
_headersConsole['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageObject.request_anthropic_beta = requestBetaHeader
}
if (
typeof _requestBodyConsole?.speed === 'string' &&
_requestBodyConsole.speed.trim()
) {
usageObject.request_speed = _requestBodyConsole.speed.trim().toLowerCase()
}
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
usageObject.speed = usageData.speed.trim().toLowerCase()
}
// 如果有详细的缓存创建数据,添加到 usage 对象中
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {
@@ -728,6 +755,19 @@ async function handleMessagesRequest(req, res) {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
}
const requestBetaHeader =
_headersCcr['anthropic-beta'] ||
_headersCcr['Anthropic-Beta'] ||
_headersCcr['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageObject.request_anthropic_beta = requestBetaHeader
}
if (typeof _requestBodyCcr?.speed === 'string' && _requestBodyCcr.speed.trim()) {
usageObject.request_speed = _requestBodyCcr.speed.trim().toLowerCase()
}
if (typeof usageData.speed === 'string' && usageData.speed.trim()) {
usageObject.speed = usageData.speed.trim().toLowerCase()
}
// 如果有详细的缓存创建数据,添加到 usage 对象中
if (ephemeral5mTokens > 0 || ephemeral1hTokens > 0) {

View File

@@ -285,12 +285,23 @@ async function handleChatCompletion(req, res, apiKeyData) {
(usage.cache_creation.ephemeral_1h_input_tokens || 0)
: usage.cache_creation_input_tokens || 0) || 0
const cacheReadTokens = usage.cache_read_input_tokens || 0
const usageWithRequestMeta = { ...usage }
const requestBetaHeader =
req.headers['anthropic-beta'] ||
req.headers['Anthropic-Beta'] ||
req.headers['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
}
if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
}
// 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
apiKeyService
.recordUsageWithDetails(
apiKeyData.id,
usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据
usageWithRequestMeta, // 传递 usage + 请求模式元信息beta/speed
model,
accountId,
accountType
@@ -413,11 +424,22 @@ async function handleChatCompletion(req, res, apiKeyData) {
(usage.cache_creation.ephemeral_1h_input_tokens || 0)
: usage.cache_creation_input_tokens || 0) || 0
const cacheReadTokens = usage.cache_read_input_tokens || 0
const usageWithRequestMeta = { ...usage }
const requestBetaHeader =
req.headers['anthropic-beta'] ||
req.headers['Anthropic-Beta'] ||
req.headers['ANTHROPIC-BETA']
if (requestBetaHeader) {
usageWithRequestMeta.request_anthropic_beta = requestBetaHeader
}
if (typeof claudeRequest?.speed === 'string' && claudeRequest.speed.trim()) {
usageWithRequestMeta.request_speed = claudeRequest.speed.trim().toLowerCase()
}
// 使用新的 recordUsageWithDetails 方法来支持详细的缓存数据
apiKeyService
.recordUsageWithDetails(
apiKeyData.id,
usage, // 直接传递整个 usage 对象,包含可能的 cache_creation 详细数据
usageWithRequestMeta, // 传递 usage + 请求模式元信息beta/speed
claudeRequest.model,
accountId,
accountType

View File

@@ -63,6 +63,20 @@ class PricingService {
'claude-haiku-3': 0.0000016,
'claude-haiku-3-5': 0.0000016
}
// Claude Prompt Caching 官方倍率(基于输入价格)
this.claudeCacheMultipliers = {
write5m: 1.25,
write1h: 2,
read: 0.1
}
// Claude 扩展计费特性
this.claudeFeatureFlags = {
context1mBeta: 'context-1m-2025-08-07',
fastModeBeta: 'fast-mode-2026-02-01',
fastModeSpeed: 'fast'
}
}
// 初始化价格服务
@@ -451,14 +465,139 @@ class PricingService {
return pricing
}
// 获取 1 小时缓存价格
getEphemeral1hPricing(modelName) {
// 从 usage 对象中提取 beta 特性列表(小写)
extractBetaFeatures(usage) {
const features = new Set()
if (!usage || typeof usage !== 'object') {
return features
}
const requestHeaders = usage.request_headers || usage.requestHeaders || null
const headerBeta =
requestHeaders && typeof requestHeaders === 'object'
? requestHeaders['anthropic-beta'] ||
requestHeaders['Anthropic-Beta'] ||
requestHeaders['ANTHROPIC-BETA']
: null
const candidates = [
usage.anthropic_beta,
usage.anthropicBeta,
usage.request_anthropic_beta,
usage.requestAnthropicBeta,
usage.beta_header,
usage.betaHeader,
usage.beta_features,
headerBeta
]
const addFeature = (value) => {
if (!value || typeof value !== 'string') {
return
}
value
.split(',')
.map((item) => item.trim().toLowerCase())
.filter(Boolean)
.forEach((item) => features.add(item))
}
for (const candidate of candidates) {
if (Array.isArray(candidate)) {
candidate.forEach(addFeature)
} else {
addFeature(candidate)
}
}
return features
}
// 提取请求/响应中的 speed 字段(小写)
extractSpeedSignal(usage) {
if (!usage || typeof usage !== 'object') {
return { responseSpeed: '', requestSpeed: '' }
}
const normalize = (value) =>
typeof value === 'string' && value.trim() ? value.trim().toLowerCase() : ''
return {
responseSpeed: normalize(usage.speed),
requestSpeed: normalize(usage.request_speed || usage.requestSpeed)
}
}
// Claude Fast Mode 目前仅适用于 Opus 4.6 系列
isFastModeEligibleClaudeModel(modelName) {
return typeof modelName === 'string' && modelName.toLowerCase().includes('opus-4-6')
}
// 去掉模型名中的 [1m] 后缀,便于价格查找
stripLongContextSuffix(modelName) {
if (typeof modelName !== 'string') {
return modelName
}
return modelName.replace(/\[1m\]/gi, '').trim()
}
// 获取 Fast Mode 对应的价格条目(仅匹配 fast/ 前缀)
getFastModePricing(modelName) {
if (!this.pricingData || !modelName) {
return null
}
const cleanedModelName = this.stripLongContextSuffix(modelName)
const exactCandidates = new Set([`fast/${cleanedModelName}`])
if (cleanedModelName.startsWith('fast/')) {
exactCandidates.add(cleanedModelName)
}
for (const candidate of exactCandidates) {
if (this.pricingData[candidate]) {
logger.debug(`💰 Found exact fast pricing for ${modelName}: ${candidate}`)
return this.pricingData[candidate]
}
}
const normalizedModel = cleanedModelName.toLowerCase().replace(/[_-]/g, '')
for (const [key, value] of Object.entries(this.pricingData)) {
if (!key.startsWith('fast/')) {
continue
}
const normalizedFastKey = key
.slice('fast/'.length)
.toLowerCase()
.replace(/[_-]/g, '')
if (normalizedFastKey.includes(normalizedModel) || normalizedModel.includes(normalizedFastKey)) {
logger.debug(`💰 Found fuzzy fast pricing for ${modelName}: ${key}`)
return value
}
}
logger.debug(`💰 No fast pricing found for model: ${modelName}`)
return null
}
// 获取 1 小时缓存价格(优先使用 model_pricing.json 中的模型字段)
getEphemeral1hPricing(modelName, pricing = null) {
if (
pricing?.cache_creation_input_token_cost_above_1hr !== null &&
pricing?.cache_creation_input_token_cost_above_1hr !== undefined
) {
return pricing.cache_creation_input_token_cost_above_1hr
}
if (!modelName) {
return 0
}
// 尝试直接匹配
if (this.ephemeral1hPricing[modelName]) {
if (
this.ephemeral1hPricing[modelName] !== null &&
this.ephemeral1hPricing[modelName] !== undefined
) {
return this.ephemeral1hPricing[modelName]
}
@@ -487,8 +626,10 @@ class PricingService {
// 计算使用费用
calculateCost(usage, modelName) {
const normalizedModelName = this.stripLongContextSuffix(modelName)
// 检查是否为 1M 上下文模型(用户通过 [1m] 后缀主动选择长上下文模式)
const isLongContextModel = modelName && modelName.includes('[1m]')
const isLongContextModel = typeof modelName === 'string' && modelName.includes('[1m]')
let isLongContextRequest = false
let useLongContextPricing = false
@@ -498,27 +639,31 @@ class PricingService {
const cacheReadTokens = usage.cache_read_input_tokens || 0
const totalInputTokens = inputTokens + cacheCreationTokens + cacheReadTokens
// 获取模型定价信息
const pricing = this.getModelPricing(modelName)
// 识别 Claude 特性标识
const betaFeatures = this.extractBetaFeatures(usage)
const hasContext1mBeta = betaFeatures.has(this.claudeFeatureFlags.context1mBeta)
const hasFastModeBeta = betaFeatures.has(this.claudeFeatureFlags.fastModeBeta)
const { responseSpeed, requestSpeed } = this.extractSpeedSignal(usage)
const hasFastSpeedSignal =
responseSpeed === this.claudeFeatureFlags.fastModeSpeed ||
requestSpeed === this.claudeFeatureFlags.fastModeSpeed
const isFastModeRequest =
hasFastModeBeta &&
hasFastSpeedSignal &&
this.isFastModeEligibleClaudeModel(normalizedModelName)
const standardPricing = this.getModelPricing(modelName)
const fastPricing = isFastModeRequest ? this.getFastModePricing(normalizedModelName) : null
const pricing = fastPricing || standardPricing
const isLongContextModeEnabled = isLongContextModel || hasContext1mBeta
// 当 [1m] 模型总输入超过 200K 且 model_pricing.json 有 above_200k 字段时,使用高档价格
// 当 [1m] 模型总输入超过 200K 时,进入 200K+ 计费逻辑
// 根据 Anthropic 官方文档:当总输入超过 200K 时,整个请求所有 token 类型都使用高档价格
if (isLongContextModel && totalInputTokens > 200000) {
if (isLongContextModeEnabled && totalInputTokens > 200000) {
isLongContextRequest = true
// 检查 model_pricing.json 是否有 above_200k 字段
if (
pricing?.input_cost_per_token_above_200k_tokens !== null &&
pricing?.input_cost_per_token_above_200k_tokens !== undefined
) {
useLongContextPricing = true
logger.info(
`💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
)
} else {
logger.warn(
`⚠️ Model ${modelName} exceeds 200K tokens but no above_200k pricing found in model_pricing.json`
)
}
useLongContextPricing = true
logger.info(
`💰 Using 200K+ pricing for ${modelName}: total input tokens = ${totalInputTokens.toLocaleString()}`
)
}
if (!pricing) {
@@ -535,32 +680,76 @@ class PricingService {
}
}
// 确定实际使用的价格(普通或 200K+ 高档价格)
const actualInputPrice = useLongContextPricing
? pricing.input_cost_per_token_above_200k_tokens
: pricing.input_cost_per_token || 0
const isClaudeModel =
(modelName && modelName.toLowerCase().includes('claude')) ||
(typeof pricing?.litellm_provider === 'string' &&
pricing.litellm_provider.toLowerCase().includes('anthropic'))
const actualOutputPrice = useLongContextPricing
? pricing.output_cost_per_token_above_200k_tokens
: pricing.output_cost_per_token || 0
if (isFastModeRequest && fastPricing) {
logger.info(`🚀 Fast mode pricing profile selected: fast/${normalizedModelName}`)
} else if (isFastModeRequest && !fastPricing) {
logger.warn(
`⚠️ Fast mode request detected but no fast pricing profile found for ${normalizedModelName}; fallback to standard profile`
)
}
const actualCacheCreatePrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
pricing.cache_creation_input_token_cost ||
0
: pricing.cache_creation_input_token_cost || 0
const baseInputPrice = pricing.input_cost_per_token || 0
const hasInput200kPrice =
pricing.input_cost_per_token_above_200k_tokens !== null &&
pricing.input_cost_per_token_above_200k_tokens !== undefined
const actualCacheReadPrice = useLongContextPricing
? pricing.cache_read_input_token_cost_above_200k_tokens ||
pricing.cache_read_input_token_cost ||
0
: pricing.cache_read_input_token_cost || 0
// 确定实际使用的输入价格(普通或 200K+ 高档价格)
// Claude 模型在 200K+ 场景下如果缺少官方字段,按 2 倍输入价兜底
let actualInputPrice = useLongContextPricing
? hasInput200kPrice
? pricing.input_cost_per_token_above_200k_tokens
: isClaudeModel
? baseInputPrice * 2
: baseInputPrice
: baseInputPrice
// 1小时缓存的 200K+ 价格
const actualEphemeral1hPrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens ||
this.getEphemeral1hPricing(modelName)
: this.getEphemeral1hPricing(modelName)
const baseOutputPrice = pricing.output_cost_per_token || 0
const hasOutput200kPrice =
pricing.output_cost_per_token_above_200k_tokens !== null &&
pricing.output_cost_per_token_above_200k_tokens !== undefined
let actualOutputPrice = useLongContextPricing
? hasOutput200kPrice
? pricing.output_cost_per_token_above_200k_tokens
: baseOutputPrice
: baseOutputPrice
let actualCacheCreatePrice = 0
let actualCacheReadPrice = 0
let actualEphemeral1hPrice = 0
if (isClaudeModel) {
// Claude 模型缓存价格统一按输入价格倍率推导,避免来源字段不一致导致计费偏差
actualCacheCreatePrice = actualInputPrice * this.claudeCacheMultipliers.write5m
actualCacheReadPrice = actualInputPrice * this.claudeCacheMultipliers.read
actualEphemeral1hPrice = actualInputPrice * this.claudeCacheMultipliers.write1h
} else {
actualCacheCreatePrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_200k_tokens ||
pricing.cache_creation_input_token_cost ||
0
: pricing.cache_creation_input_token_cost || 0
actualCacheReadPrice = useLongContextPricing
? pricing.cache_read_input_token_cost_above_200k_tokens ||
pricing.cache_read_input_token_cost ||
0
: pricing.cache_read_input_token_cost || 0
const defaultEphemeral1hPrice = this.getEphemeral1hPricing(modelName, pricing)
// 非 Claude 模型维持原有字段优先级
actualEphemeral1hPrice = useLongContextPricing
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== null &&
pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens !== undefined
? pricing.cache_creation_input_token_cost_above_1hr_above_200k_tokens
: defaultEphemeral1hPrice
: defaultEphemeral1hPrice
}
// 计算各项费用
const inputCost = inputTokens * actualInputPrice

View File

@@ -2,8 +2,11 @@
* PricingService 长上下文200K+)分层计费测试
*
* 测试当 [1m] 模型总输入超过 200K tokens 时的分层计费逻辑:
* - 使用 model_pricing.json 中的 *_above_200k_tokens 字段
* - 所有 token 类型input/output/cache_create/cache_read都切换到高档价格
* - 输入/输出优先使用 model_pricing.json 中的 *_above_200k_tokens 字段
* - Claude 缓存价格按输入价格倍率推导:
* - 5m cache write = input * 1.25
* - 1h cache write = input * 2
* - cache read = input * 0.1
*/
// Mock logger to avoid console output during tests
@@ -44,6 +47,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
output_cost_per_token: 0.000015, // $15/MTok
cache_creation_input_token_cost: 0.00000375, // $3.75/MTok
cache_read_input_token_cost: 0.0000003, // $0.30/MTok
max_input_tokens: 1000000,
// 200K+ 高档价格
input_cost_per_token_above_200k_tokens: 0.000006, // $6/MTok (2x)
output_cost_per_token_above_200k_tokens: 0.0000225, // $22.50/MTok (1.5x)
@@ -59,6 +63,15 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
output_cost_per_token: 0.00000125,
cache_creation_input_token_cost: 0.0000003,
cache_read_input_token_cost: 0.00000003
},
// Fast Mode 适配测试模型Opus 4.6
'claude-opus-4-6': {
input_cost_per_token: 0.000005,
output_cost_per_token: 0.000025,
cache_creation_input_token_cost: 0.00000625,
cache_read_input_token_cost: 0.0000005,
input_cost_per_token_above_200k_tokens: 0.00001,
output_cost_per_token_above_200k_tokens: 0.0000375
}
}
@@ -152,7 +165,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.pricing.input).toBe(0.000006)
expect(result.pricing.output).toBe(0.0000225)
expect(result.pricing.cacheCreate).toBe(0.0000075)
expect(result.pricing.cacheRead).toBe(0.0000006)
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
})
it('仅 cache_creation + cache_read 超过 200K 也应触发', () => {
@@ -199,13 +212,13 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514[1m]')
// cache_read_input_token_cost_above_200k_tokens = 0.0000006
expect(result.pricing.cacheRead).toBe(0.0000006)
expect(result.pricing.cacheRead).toBeCloseTo(0.0000006, 12)
expect(result.cacheReadCost).toBeCloseTo(60000 * 0.0000006, 10)
})
})
describe('详细缓存创建数据ephemeral_5m / ephemeral_1h', () => {
it('200K+ 时 ephemeral_1h 应使用 cache_creation_input_token_cost_above_1hr_above_200k_tokens', () => {
it('200K+ 时 Claude ephemeral_1h 应按 input * 2 计算', () => {
const usage = {
input_tokens: 200001,
output_tokens: 1000,
@@ -222,26 +235,88 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.isLongContextRequest).toBe(true)
// ephemeral_5m: 5000 * 0.0000075 = 0.0000375
expect(result.ephemeral5mCost).toBeCloseTo(5000 * 0.0000075, 10)
// ephemeral_1h: 5000 * 0.000015 (above_1hr_above_200k)
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000015, 10)
// 200K+ input = 0.000006, ephemeral_1h = input * 2 = 0.000012
expect(result.pricing.ephemeral1h).toBeCloseTo(0.000012, 10)
expect(result.ephemeral1hCost).toBeCloseTo(5000 * 0.000012, 10)
})
})
describe('回退测试', () => {
it('模型无 above_200k 字段时回退到基础价格', () => {
it('Claude 模型无 above_200k 字段时200K+ 输入价格按 2 倍并推导缓存价格', () => {
const usage = {
input_tokens: 250000,
output_tokens: 1000,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 10000
}
const result = pricingService.calculateCost(usage, 'claude-3-haiku-20240307[1m]')
// 模型没有 above_200k 字段,使用基础价格
expect(result.isLongContextRequest).toBe(true) // 超过 200K
expect(result.pricing.input).toBe(0.00000025) // 基础价格(没有 above_200k 字段)
expect(result.pricing.cacheCreate).toBe(0.0000003) // 基础价格
// 模型没有 above_200k 字段,Claude 200K+ 输入按 2 倍兜底
expect(result.isLongContextRequest).toBe(true)
expect(result.pricing.input).toBe(0.0000005) // 0.00000025 * 2
// 缓存价格由输入价格推导
expect(result.pricing.cacheCreate).toBeCloseTo(0.000000625, 12) // input * 1.25
expect(result.pricing.cacheRead).toBeCloseTo(0.00000005, 12) // input * 0.1
})
})
describe('Header 与 Fast Mode 适配', () => {
it('无 [1m] 后缀但带 context-1m beta超过 200K 时应触发长上下文计费', () => {
const usage = {
input_tokens: 210000,
output_tokens: 1000,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
request_anthropic_beta: 'context-1m-2025-08-07'
}
const result = pricingService.calculateCost(usage, 'claude-sonnet-4-20250514')
expect(result.isLongContextRequest).toBe(true)
expect(result.pricing.input).toBe(0.000006)
expect(result.pricing.output).toBe(0.0000225)
})
it('Opus 4.6 在 fast-mode beta + speed=fast 时应用 Fast Mode 6x', () => {
const usage = {
input_tokens: 100000,
output_tokens: 20000,
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 5000,
request_anthropic_beta: 'fast-mode-2026-02-01',
speed: 'fast'
}
const result = pricingService.calculateCost(usage, 'claude-opus-4-6')
// input: 0.000005 * 6 = 0.00003
expect(result.pricing.input).toBeCloseTo(0.00003, 12)
// output: 0.000025 * 6 = 0.00015
expect(result.pricing.output).toBeCloseTo(0.00015, 12)
// cache create/read 由 fast 后 input 推导
expect(result.pricing.cacheCreate).toBeCloseTo(0.0000375, 12) // 0.00003 * 1.25
expect(result.pricing.cacheRead).toBeCloseTo(0.000003, 12) // 0.00003 * 0.1
expect(result.pricing.ephemeral1h).toBeCloseTo(0.00006, 12) // 0.00003 * 2
})
it('Opus 4.6 在 fast-mode + [1m] 且超过 200K 时应叠加计费12x input', () => {
const usage = {
input_tokens: 210000,
output_tokens: 1000,
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 10000,
request_anthropic_beta: 'fast-mode-2026-02-01,context-1m-2025-08-07',
speed: 'fast'
}
const result = pricingService.calculateCost(usage, 'claude-opus-4-6[1m]')
expect(result.isLongContextRequest).toBe(true)
// input: 0.000005 -> long context 0.00001 -> fast 6x => 0.00006 (即标准 12x)
expect(result.pricing.input).toBeCloseTo(0.00006, 12)
// output: 0.000025 -> long context 0.0000375 -> fast 6x => 0.000225 (即标准 9x)
expect(result.pricing.output).toBeCloseTo(0.000225, 12)
})
})
@@ -261,7 +336,7 @@ describe('PricingService - 200K+ Long Context Pricing', () => {
expect(result.pricing.input).toBe(0.000003) // 基础价格
expect(result.pricing.output).toBe(0.000015) // 基础价格
expect(result.pricing.cacheCreate).toBe(0.00000375) // 基础价格
expect(result.pricing.cacheRead).toBe(0.0000003) // 基础价格
expect(result.pricing.cacheRead).toBeCloseTo(0.0000003, 12) // 基础价格
})
it('[1m] 模型未超过 200K 时使用基础价格', () => {