feat: support service_tier priority pricing for OpenAI Responses endpoint

Preserve service_tier from request body before field stripping and pass
it through the cost calculation chain. When service_tier is "priority"
and the model has supports_service_tier in pricing data, use *_priority
price fields with automatic fallback to standard pricing.
This commit is contained in:
shaw
2026-03-10 09:55:32 +08:00
parent 3dfb8fa83e
commit cc5e1db259
4 changed files with 38 additions and 13 deletions

View File

@@ -283,6 +283,9 @@ const handleResponses = async (req, res) => {
const codexCliPattern = /^(codex_vscode|codex_cli_rs|codex_exec)\/[\d.]+/i
const isCodexCLI = codexCliPattern.test(userAgent)
// 提取 service_tier 用于后续费用计算(在字段被移除前保存)
req._serviceTier = req.body?.service_tier || null
// 如果不是 Codex CLI 请求且不是来自 unified 端点(已完成格式转换),则进行适配
if (!isCodexCLI && !req._fromUnifiedEndpoint) {
// 移除不需要的请求体字段
@@ -632,7 +635,8 @@ const handleResponses = async (req, res) => {
cacheReadTokens,
actualModel,
accountId,
'openai'
'openai',
req._serviceTier
)
logger.info(
@@ -749,7 +753,8 @@ const handleResponses = async (req, res) => {
cacheReadTokens,
modelToRecord,
accountId,
'openai'
'openai',
req._serviceTier
)
logger.info(

View File

@@ -1539,7 +1539,8 @@ class ApiKeyService {
cacheReadTokens = 0,
model = 'unknown',
accountId = null,
accountType = null
accountType = null,
serviceTier = null
) {
try {
const totalTokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens
@@ -1553,7 +1554,8 @@ class ApiKeyService {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
},
model
model,
serviceTier
)
// 检查是否为 1M 上下文请求

View File

@@ -602,6 +602,7 @@ class OpenAIResponsesRelayService {
usageData.total_tokens || totalInputTokens + outputTokens + cacheCreateTokens
const modelToRecord = actualModel || requestedModel || 'gpt-4'
const serviceTier = req._serviceTier || null
await apiKeyService.recordUsage(
apiKeyData.id,
actualInputTokens, // 传递实际输入(不含缓存)
@@ -610,7 +611,8 @@ class OpenAIResponsesRelayService {
cacheReadTokens,
modelToRecord,
account.id,
'openai-responses'
'openai-responses',
serviceTier
)
logger.info(
@@ -631,7 +633,8 @@ class OpenAIResponsesRelayService {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
},
modelToRecord
modelToRecord,
serviceTier
)
await openaiResponsesAccountService.updateUsageQuota(account.id, costInfo.costs.total)
}
@@ -731,6 +734,7 @@ class OpenAIResponsesRelayService {
const totalTokens =
usageData.total_tokens || totalInputTokens + outputTokens + cacheCreateTokens
const serviceTier = req._serviceTier || null
await apiKeyService.recordUsage(
apiKeyData.id,
actualInputTokens, // 传递实际输入(不含缓存)
@@ -739,7 +743,8 @@ class OpenAIResponsesRelayService {
cacheReadTokens,
actualModel,
account.id,
'openai-responses'
'openai-responses',
serviceTier
)
logger.info(
@@ -760,7 +765,8 @@ class OpenAIResponsesRelayService {
cache_creation_input_tokens: cacheCreateTokens,
cache_read_input_tokens: cacheReadTokens
},
actualModel
actualModel,
serviceTier
)
await openaiResponsesAccountService.updateUsageQuota(account.id, costInfo.costs.total)
}

View File

@@ -82,7 +82,7 @@ class CostCalculator {
* @param {string} model - 模型名称
* @returns {Object} 费用详情
*/
static calculateCost(usage, model = 'unknown') {
static calculateCost(usage, model = 'unknown', serviceTier = null) {
// 如果 usage 包含详细的 cache_creation 对象或是 1M 模型,使用 pricingService 来处理
if (
(usage.cache_creation && typeof usage.cache_creation === 'object') ||
@@ -148,10 +148,22 @@ class CostCalculator {
let usingDynamicPricing = false
if (pricingData) {
// 转换动态价格格式为内部格式
const inputPrice = (pricingData.input_cost_per_token || 0) * 1000000 // 转换为per 1M tokens
const outputPrice = (pricingData.output_cost_per_token || 0) * 1000000
const cacheReadPrice = (pricingData.cache_read_input_token_cost || 0) * 1000000
// 检查是否使用 priority 定价
const usePriority = serviceTier === 'priority' && pricingData.supports_service_tier
// 转换动态价格格式为内部格式priority 定价时使用 *_priority 字段,回退到标准价格)
const inputPrice =
((usePriority && pricingData.input_cost_per_token_priority) ||
pricingData.input_cost_per_token ||
0) * 1000000
const outputPrice =
((usePriority && pricingData.output_cost_per_token_priority) ||
pricingData.output_cost_per_token ||
0) * 1000000
const cacheReadPrice =
((usePriority && pricingData.cache_read_input_token_cost_priority) ||
pricingData.cache_read_input_token_cost ||
0) * 1000000
// OpenAI 模型的特殊处理:
// - 如果没有 cache_creation_input_token_cost缓存创建按普通 input 价格计费