mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-03-30 04:27:20 +00:00
feat: support service_tier priority pricing for OpenAI Responses endpoint
Preserve service_tier from request body before field stripping and pass it through the cost calculation chain. When service_tier is "priority" and the model has supports_service_tier in pricing data, use *_priority price fields with automatic fallback to standard pricing.
This commit is contained in:
@@ -283,6 +283,9 @@ const handleResponses = async (req, res) => {
|
||||
const codexCliPattern = /^(codex_vscode|codex_cli_rs|codex_exec)\/[\d.]+/i
|
||||
const isCodexCLI = codexCliPattern.test(userAgent)
|
||||
|
||||
// 提取 service_tier 用于后续费用计算(在字段被移除前保存)
|
||||
req._serviceTier = req.body?.service_tier || null
|
||||
|
||||
// 如果不是 Codex CLI 请求且不是来自 unified 端点(已完成格式转换),则进行适配
|
||||
if (!isCodexCLI && !req._fromUnifiedEndpoint) {
|
||||
// 移除不需要的请求体字段
|
||||
@@ -632,7 +635,8 @@ const handleResponses = async (req, res) => {
|
||||
cacheReadTokens,
|
||||
actualModel,
|
||||
accountId,
|
||||
'openai'
|
||||
'openai',
|
||||
req._serviceTier
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -749,7 +753,8 @@ const handleResponses = async (req, res) => {
|
||||
cacheReadTokens,
|
||||
modelToRecord,
|
||||
accountId,
|
||||
'openai'
|
||||
'openai',
|
||||
req._serviceTier
|
||||
)
|
||||
|
||||
logger.info(
|
||||
|
||||
@@ -1539,7 +1539,8 @@ class ApiKeyService {
|
||||
cacheReadTokens = 0,
|
||||
model = 'unknown',
|
||||
accountId = null,
|
||||
accountType = null
|
||||
accountType = null,
|
||||
serviceTier = null
|
||||
) {
|
||||
try {
|
||||
const totalTokens = inputTokens + outputTokens + cacheCreateTokens + cacheReadTokens
|
||||
@@ -1553,7 +1554,8 @@ class ApiKeyService {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
},
|
||||
model
|
||||
model,
|
||||
serviceTier
|
||||
)
|
||||
|
||||
// 检查是否为 1M 上下文请求
|
||||
|
||||
@@ -602,6 +602,7 @@ class OpenAIResponsesRelayService {
|
||||
usageData.total_tokens || totalInputTokens + outputTokens + cacheCreateTokens
|
||||
const modelToRecord = actualModel || requestedModel || 'gpt-4'
|
||||
|
||||
const serviceTier = req._serviceTier || null
|
||||
await apiKeyService.recordUsage(
|
||||
apiKeyData.id,
|
||||
actualInputTokens, // 传递实际输入(不含缓存)
|
||||
@@ -610,7 +611,8 @@ class OpenAIResponsesRelayService {
|
||||
cacheReadTokens,
|
||||
modelToRecord,
|
||||
account.id,
|
||||
'openai-responses'
|
||||
'openai-responses',
|
||||
serviceTier
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -631,7 +633,8 @@ class OpenAIResponsesRelayService {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
},
|
||||
modelToRecord
|
||||
modelToRecord,
|
||||
serviceTier
|
||||
)
|
||||
await openaiResponsesAccountService.updateUsageQuota(account.id, costInfo.costs.total)
|
||||
}
|
||||
@@ -731,6 +734,7 @@ class OpenAIResponsesRelayService {
|
||||
const totalTokens =
|
||||
usageData.total_tokens || totalInputTokens + outputTokens + cacheCreateTokens
|
||||
|
||||
const serviceTier = req._serviceTier || null
|
||||
await apiKeyService.recordUsage(
|
||||
apiKeyData.id,
|
||||
actualInputTokens, // 传递实际输入(不含缓存)
|
||||
@@ -739,7 +743,8 @@ class OpenAIResponsesRelayService {
|
||||
cacheReadTokens,
|
||||
actualModel,
|
||||
account.id,
|
||||
'openai-responses'
|
||||
'openai-responses',
|
||||
serviceTier
|
||||
)
|
||||
|
||||
logger.info(
|
||||
@@ -760,7 +765,8 @@ class OpenAIResponsesRelayService {
|
||||
cache_creation_input_tokens: cacheCreateTokens,
|
||||
cache_read_input_tokens: cacheReadTokens
|
||||
},
|
||||
actualModel
|
||||
actualModel,
|
||||
serviceTier
|
||||
)
|
||||
await openaiResponsesAccountService.updateUsageQuota(account.id, costInfo.costs.total)
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ class CostCalculator {
|
||||
* @param {string} model - 模型名称
|
||||
* @returns {Object} 费用详情
|
||||
*/
|
||||
static calculateCost(usage, model = 'unknown') {
|
||||
static calculateCost(usage, model = 'unknown', serviceTier = null) {
|
||||
// 如果 usage 包含详细的 cache_creation 对象或是 1M 模型,使用 pricingService 来处理
|
||||
if (
|
||||
(usage.cache_creation && typeof usage.cache_creation === 'object') ||
|
||||
@@ -148,10 +148,22 @@ class CostCalculator {
|
||||
let usingDynamicPricing = false
|
||||
|
||||
if (pricingData) {
|
||||
// 转换动态价格格式为内部格式
|
||||
const inputPrice = (pricingData.input_cost_per_token || 0) * 1000000 // 转换为per 1M tokens
|
||||
const outputPrice = (pricingData.output_cost_per_token || 0) * 1000000
|
||||
const cacheReadPrice = (pricingData.cache_read_input_token_cost || 0) * 1000000
|
||||
// 检查是否使用 priority 定价
|
||||
const usePriority = serviceTier === 'priority' && pricingData.supports_service_tier
|
||||
|
||||
// 转换动态价格格式为内部格式(priority 定价时使用 *_priority 字段,回退到标准价格)
|
||||
const inputPrice =
|
||||
((usePriority && pricingData.input_cost_per_token_priority) ||
|
||||
pricingData.input_cost_per_token ||
|
||||
0) * 1000000
|
||||
const outputPrice =
|
||||
((usePriority && pricingData.output_cost_per_token_priority) ||
|
||||
pricingData.output_cost_per_token ||
|
||||
0) * 1000000
|
||||
const cacheReadPrice =
|
||||
((usePriority && pricingData.cache_read_input_token_cost_priority) ||
|
||||
pricingData.cache_read_input_token_cost ||
|
||||
0) * 1000000
|
||||
|
||||
// OpenAI 模型的特殊处理:
|
||||
// - 如果没有 cache_creation_input_token_cost,缓存创建按普通 input 价格计费
|
||||
|
||||
Reference in New Issue
Block a user