fix(billing): 修复 OpenAI fast 档位计费并补齐展示

- 打通 service_tier 在 OpenAI HTTP、WS、passthrough 与 usage 记录中的传递
- 修正 priority/flex 计费逻辑,并将 fast 归一化为 priority
- 在用户端和管理端补齐服务档位与计费明细展示
- 补齐前后端测试,并修复 WS 限流信号重复持久化导致的全量回归失败

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yangjianbo
2026-03-08 23:22:28 +08:00
parent bcb6444f89
commit 87f4ed591e
29 changed files with 1417 additions and 47 deletions

View File

@@ -475,6 +475,7 @@ func usageLogFromServiceUser(l *service.UsageLog) UsageLog {
AccountID: l.AccountID,
RequestID: l.RequestID,
Model: l.Model,
ServiceTier: l.ServiceTier,
ReasoningEffort: l.ReasoningEffort,
GroupID: l.GroupID,
SubscriptionID: l.SubscriptionID,

View File

@@ -71,3 +71,29 @@ func TestRequestTypeStringPtrNil(t *testing.T) {
t.Parallel()
require.Nil(t, requestTypeStringPtr(nil))
}
func TestUsageLogFromService_IncludesServiceTierForUserAndAdmin(t *testing.T) {
t.Parallel()
serviceTier := "priority"
log := &service.UsageLog{
RequestID: "req_3",
Model: "gpt-5.4",
ServiceTier: &serviceTier,
AccountRateMultiplier: f64Ptr(1.5),
}
userDTO := UsageLogFromService(log)
adminDTO := UsageLogFromServiceAdmin(log)
require.NotNil(t, userDTO.ServiceTier)
require.Equal(t, serviceTier, *userDTO.ServiceTier)
require.NotNil(t, adminDTO.ServiceTier)
require.Equal(t, serviceTier, *adminDTO.ServiceTier)
require.NotNil(t, adminDTO.AccountRateMultiplier)
require.InDelta(t, 1.5, *adminDTO.AccountRateMultiplier, 1e-12)
}
func f64Ptr(value float64) *float64 {
return &value
}

View File

@@ -315,6 +315,8 @@ type UsageLog struct {
AccountID int64 `json:"account_id"`
RequestID string `json:"request_id"`
Model string `json:"model"`
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
ServiceTier *string `json:"service_tier,omitempty"`
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API).
// nil means not provided / not applicable.
ReasoningEffort *string `json:"reasoning_effort,omitempty"`

View File

@@ -22,7 +22,7 @@ import (
"github.com/lib/pq"
)
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, reasoning_effort, cache_ttl_overridden, created_at"
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, service_tier, reasoning_effort, cache_ttl_overridden, created_at"
// dateFormatWhitelist 将 granularity 参数映射为 PostgreSQL TO_CHAR 格式字符串,防止外部输入直接拼入 SQL
var dateFormatWhitelist = map[string]string{
@@ -135,6 +135,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
image_count,
image_size,
media_type,
service_tier,
reasoning_effort,
cache_ttl_overridden,
created_at
@@ -144,7 +145,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
$8, $9, $10, $11,
$12, $13,
$14, $15, $16, $17, $18, $19,
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36
)
ON CONFLICT (request_id, api_key_id) DO NOTHING
RETURNING id, created_at
@@ -158,6 +159,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
ipAddress := nullString(log.IPAddress)
imageSize := nullString(log.ImageSize)
mediaType := nullString(log.MediaType)
serviceTier := nullString(log.ServiceTier)
reasoningEffort := nullString(log.ReasoningEffort)
var requestIDArg any
@@ -198,6 +200,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
log.ImageCount,
imageSize,
mediaType,
serviceTier,
reasoningEffort,
log.CacheTTLOverridden,
createdAt,
@@ -2505,6 +2508,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
imageCount int
imageSize sql.NullString
mediaType sql.NullString
serviceTier sql.NullString
reasoningEffort sql.NullString
cacheTTLOverridden bool
createdAt time.Time
@@ -2544,6 +2548,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
&imageCount,
&imageSize,
&mediaType,
&serviceTier,
&reasoningEffort,
&cacheTTLOverridden,
&createdAt,
@@ -2614,6 +2619,9 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
if mediaType.Valid {
log.MediaType = &mediaType.String
}
if serviceTier.Valid {
log.ServiceTier = &serviceTier.String
}
if reasoningEffort.Valid {
log.ReasoningEffort = &reasoningEffort.String
}

View File

@@ -71,6 +71,7 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
log.ImageCount,
sqlmock.AnyArg(), // image_size
sqlmock.AnyArg(), // media_type
sqlmock.AnyArg(), // service_tier
sqlmock.AnyArg(), // reasoning_effort
log.CacheTTLOverridden,
createdAt,
@@ -81,12 +82,76 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
require.NoError(t, err)
require.True(t, inserted)
require.Equal(t, int64(99), log.ID)
require.Nil(t, log.ServiceTier)
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
require.True(t, log.Stream)
require.True(t, log.OpenAIWSMode)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestUsageLogRepositoryCreate_PersistsServiceTier(t *testing.T) {
db, mock := newSQLMock(t)
repo := &usageLogRepository{sql: db}
createdAt := time.Date(2025, 1, 2, 12, 0, 0, 0, time.UTC)
serviceTier := "priority"
log := &service.UsageLog{
UserID: 1,
APIKeyID: 2,
AccountID: 3,
RequestID: "req-service-tier",
Model: "gpt-5.4",
ServiceTier: &serviceTier,
CreatedAt: createdAt,
}
mock.ExpectQuery("INSERT INTO usage_logs").
WithArgs(
log.UserID,
log.APIKeyID,
log.AccountID,
log.RequestID,
log.Model,
sqlmock.AnyArg(),
sqlmock.AnyArg(),
log.InputTokens,
log.OutputTokens,
log.CacheCreationTokens,
log.CacheReadTokens,
log.CacheCreation5mTokens,
log.CacheCreation1hTokens,
log.InputCost,
log.OutputCost,
log.CacheCreationCost,
log.CacheReadCost,
log.TotalCost,
log.ActualCost,
log.RateMultiplier,
log.AccountRateMultiplier,
log.BillingType,
int16(service.RequestTypeSync),
false,
false,
sqlmock.AnyArg(),
sqlmock.AnyArg(),
sqlmock.AnyArg(),
sqlmock.AnyArg(),
log.ImageCount,
sqlmock.AnyArg(),
sqlmock.AnyArg(),
serviceTier,
sqlmock.AnyArg(),
log.CacheTTLOverridden,
createdAt,
).
WillReturnRows(sqlmock.NewRows([]string{"id", "created_at"}).AddRow(int64(100), createdAt))
inserted, err := repo.Create(context.Background(), log)
require.NoError(t, err)
require.True(t, inserted)
require.NoError(t, mock.ExpectationsWereMet())
}
func TestUsageLogRepositoryListWithFiltersRequestTypePriority(t *testing.T) {
db, mock := newSQLMock(t)
repo := &usageLogRepository{sql: db}
@@ -280,11 +345,14 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
0,
sql.NullString{},
sql.NullString{},
sql.NullString{Valid: true, String: "priority"},
sql.NullString{},
false,
now,
}})
require.NoError(t, err)
require.NotNil(t, log.ServiceTier)
require.Equal(t, "priority", *log.ServiceTier)
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
require.True(t, log.Stream)
require.True(t, log.OpenAIWSMode)
@@ -316,13 +384,53 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
0,
sql.NullString{},
sql.NullString{},
sql.NullString{Valid: true, String: "flex"},
sql.NullString{},
false,
now,
}})
require.NoError(t, err)
require.NotNil(t, log.ServiceTier)
require.Equal(t, "flex", *log.ServiceTier)
require.Equal(t, service.RequestTypeStream, log.RequestType)
require.True(t, log.Stream)
require.False(t, log.OpenAIWSMode)
})
t.Run("service_tier_is_scanned", func(t *testing.T) {
now := time.Now().UTC()
log, err := scanUsageLog(usageLogScannerStub{values: []any{
int64(3),
int64(12),
int64(22),
int64(32),
sql.NullString{Valid: true, String: "req-3"},
"gpt-5.4",
sql.NullInt64{},
sql.NullInt64{},
1, 2, 3, 4, 5, 6,
0.1, 0.2, 0.3, 0.4, 1.0, 0.9,
1.0,
sql.NullFloat64{},
int16(service.BillingTypeBalance),
int16(service.RequestTypeSync),
false,
false,
sql.NullInt64{},
sql.NullInt64{},
sql.NullString{},
sql.NullString{},
0,
sql.NullString{},
sql.NullString{},
sql.NullString{Valid: true, String: "priority"},
sql.NullString{},
false,
now,
}})
require.NoError(t, err)
require.NotNil(t, log.ServiceTier)
require.Equal(t, "priority", *log.ServiceTier)
})
}

View File

@@ -210,8 +210,10 @@ func TestAPIContracts(t *testing.T) {
"sora_video_price_per_request": null,
"sora_video_price_per_request_hd": null,
"claude_code_only": false,
"allow_messages_dispatch": false,
"fallback_group_id": null,
"fallback_group_id_on_invalid_request": null,
"allow_messages_dispatch": false,
"created_at": "2025-01-02T03:04:05Z",
"updated_at": "2025-01-02T03:04:05Z"
}

View File

@@ -43,16 +43,19 @@ type BillingCache interface {
// ModelPricing 模型价格配置per-token价格与LiteLLM格式一致
type ModelPricing struct {
InputPricePerToken float64 // 每token输入价格 (USD)
OutputPricePerToken float64 // 每token输价格 (USD)
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
InputPricePerToken float64 // 每token输入价格 (USD)
InputPricePerTokenPriority float64 // priority service tier 下每token输价格 (USD)
OutputPricePerToken float64 // 每token输出价格 (USD)
OutputPricePerTokenPriority float64 // priority service tier 下每token输出价格 (USD)
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
CacheReadPricePerTokenPriority float64 // priority service tier 下缓存读取每token价格 (USD)
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
}
const (
@@ -61,6 +64,28 @@ const (
openAIGPT54LongContextOutputMultiplier = 1.5
)
func normalizeBillingServiceTier(serviceTier string) string {
return strings.ToLower(strings.TrimSpace(serviceTier))
}
func usePriorityServiceTierPricing(serviceTier string, pricing *ModelPricing) bool {
if pricing == nil || normalizeBillingServiceTier(serviceTier) != "priority" {
return false
}
return pricing.InputPricePerTokenPriority > 0 || pricing.OutputPricePerTokenPriority > 0 || pricing.CacheReadPricePerTokenPriority > 0
}
func serviceTierCostMultiplier(serviceTier string) float64 {
switch normalizeBillingServiceTier(serviceTier) {
case "priority":
return 2.0
case "flex":
return 0.5
default:
return 1.0
}
}
// UsageTokens 使用的token数量
type UsageTokens struct {
InputTokens int
@@ -173,30 +198,60 @@ func (s *BillingService) initFallbackPricing() {
// OpenAI GPT-5.1(本地兜底,防止动态定价不可用时拒绝计费)
s.fallbackPrices["gpt-5.1"] = &ModelPricing{
InputPricePerToken: 1.25e-6, // $1.25 per MTok
OutputPricePerToken: 10e-6, // $10 per MTok
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
CacheReadPricePerToken: 0.125e-6,
SupportsCacheBreakdown: false,
InputPricePerToken: 1.25e-6, // $1.25 per MTok
InputPricePerTokenPriority: 2.5e-6, // $2.5 per MTok
OutputPricePerToken: 10e-6, // $10 per MTok
OutputPricePerTokenPriority: 20e-6, // $20 per MTok
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
CacheReadPricePerToken: 0.125e-6,
CacheReadPricePerTokenPriority: 0.25e-6,
SupportsCacheBreakdown: false,
}
// OpenAI GPT-5.4(业务指定价格)
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
InputPricePerToken: 2.5e-6, // $2.5 per MTok
OutputPricePerToken: 15e-6, // $15 per MTok
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
SupportsCacheBreakdown: false,
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
InputPricePerToken: 2.5e-6, // $2.5 per MTok
InputPricePerTokenPriority: 5e-6, // $5 per MTok
OutputPricePerToken: 15e-6, // $15 per MTok
OutputPricePerTokenPriority: 30e-6, // $30 per MTok
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
CacheReadPricePerTokenPriority: 0.5e-6, // $0.5 per MTok
SupportsCacheBreakdown: false,
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
}
// OpenAI GPT-5.2(本地兜底)
s.fallbackPrices["gpt-5.2"] = &ModelPricing{
InputPricePerToken: 1.75e-6,
InputPricePerTokenPriority: 3.5e-6,
OutputPricePerToken: 14e-6,
OutputPricePerTokenPriority: 28e-6,
CacheCreationPricePerToken: 1.75e-6,
CacheReadPricePerToken: 0.175e-6,
CacheReadPricePerTokenPriority: 0.35e-6,
SupportsCacheBreakdown: false,
}
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
InputPricePerToken: 1.5e-6, // $1.5 per MTok
OutputPricePerToken: 12e-6, // $12 per MTok
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
CacheReadPricePerToken: 0.15e-6,
SupportsCacheBreakdown: false,
InputPricePerToken: 1.5e-6, // $1.5 per MTok
InputPricePerTokenPriority: 3e-6, // $3 per MTok
OutputPricePerToken: 12e-6, // $12 per MTok
OutputPricePerTokenPriority: 24e-6, // $24 per MTok
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
CacheReadPricePerToken: 0.15e-6,
CacheReadPricePerTokenPriority: 0.3e-6,
SupportsCacheBreakdown: false,
}
s.fallbackPrices["gpt-5.2-codex"] = &ModelPricing{
InputPricePerToken: 1.75e-6,
InputPricePerTokenPriority: 3.5e-6,
OutputPricePerToken: 14e-6,
OutputPricePerTokenPriority: 28e-6,
CacheCreationPricePerToken: 1.75e-6,
CacheReadPricePerToken: 0.175e-6,
CacheReadPricePerTokenPriority: 0.35e-6,
SupportsCacheBreakdown: false,
}
s.fallbackPrices["gpt-5.3-codex"] = s.fallbackPrices["gpt-5.1-codex"]
}
@@ -241,6 +296,10 @@ func (s *BillingService) getFallbackPricing(model string) *ModelPricing {
switch normalized {
case "gpt-5.4":
return s.fallbackPrices["gpt-5.4"]
case "gpt-5.2":
return s.fallbackPrices["gpt-5.2"]
case "gpt-5.2-codex":
return s.fallbackPrices["gpt-5.2-codex"]
case "gpt-5.3-codex":
return s.fallbackPrices["gpt-5.3-codex"]
case "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "codex-mini-latest":
@@ -269,16 +328,19 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
enableBreakdown := price1h > 0 && price1h > price5m
return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
InputPricePerToken: litellmPricing.InputCostPerToken,
OutputPricePerToken: litellmPricing.OutputCostPerToken,
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
CacheCreation5mPrice: price5m,
CacheCreation1hPrice: price1h,
SupportsCacheBreakdown: enableBreakdown,
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
InputPricePerToken: litellmPricing.InputCostPerToken,
InputPricePerTokenPriority: litellmPricing.InputCostPerTokenPriority,
OutputPricePerToken: litellmPricing.OutputCostPerToken,
OutputPricePerTokenPriority: litellmPricing.OutputCostPerTokenPriority,
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
CacheReadPricePerTokenPriority: litellmPricing.CacheReadInputTokenCostPriority,
CacheCreation5mPrice: price5m,
CacheCreation1hPrice: price1h,
SupportsCacheBreakdown: enableBreakdown,
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
}), nil
}
}
@@ -295,6 +357,10 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
// CalculateCost 计算使用费用
func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMultiplier float64) (*CostBreakdown, error) {
return s.CalculateCostWithServiceTier(model, tokens, rateMultiplier, "")
}
func (s *BillingService) CalculateCostWithServiceTier(model string, tokens UsageTokens, rateMultiplier float64, serviceTier string) (*CostBreakdown, error) {
pricing, err := s.GetModelPricing(model)
if err != nil {
return nil, err
@@ -303,6 +369,21 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
breakdown := &CostBreakdown{}
inputPricePerToken := pricing.InputPricePerToken
outputPricePerToken := pricing.OutputPricePerToken
cacheReadPricePerToken := pricing.CacheReadPricePerToken
tierMultiplier := 1.0
if usePriorityServiceTierPricing(serviceTier, pricing) {
if pricing.InputPricePerTokenPriority > 0 {
inputPricePerToken = pricing.InputPricePerTokenPriority
}
if pricing.OutputPricePerTokenPriority > 0 {
outputPricePerToken = pricing.OutputPricePerTokenPriority
}
if pricing.CacheReadPricePerTokenPriority > 0 {
cacheReadPricePerToken = pricing.CacheReadPricePerTokenPriority
}
} else {
tierMultiplier = serviceTierCostMultiplier(serviceTier)
}
if s.shouldApplySessionLongContextPricing(tokens, pricing) {
inputPricePerToken *= pricing.LongContextInputMultiplier
outputPricePerToken *= pricing.LongContextOutputMultiplier
@@ -329,7 +410,14 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken
}
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * pricing.CacheReadPricePerToken
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * cacheReadPricePerToken
if tierMultiplier != 1.0 {
breakdown.InputCost *= tierMultiplier
breakdown.OutputCost *= tierMultiplier
breakdown.CacheCreationCost *= tierMultiplier
breakdown.CacheReadCost *= tierMultiplier
}
// 计算总费用
breakdown.TotalCost = breakdown.InputCost + breakdown.OutputCost +

View File

@@ -522,3 +522,189 @@ func TestCalculateCost_LargeTokenCount(t *testing.T) {
require.False(t, math.IsNaN(cost.TotalCost))
require.False(t, math.IsInf(cost.TotalCost, 0))
}
func TestServiceTierCostMultiplier(t *testing.T) {
require.InDelta(t, 2.0, serviceTierCostMultiplier("priority"), 1e-12)
require.InDelta(t, 2.0, serviceTierCostMultiplier(" Priority "), 1e-12)
require.InDelta(t, 0.5, serviceTierCostMultiplier("flex"), 1e-12)
require.InDelta(t, 1.0, serviceTierCostMultiplier(""), 1e-12)
require.InDelta(t, 1.0, serviceTierCostMultiplier("default"), 1e-12)
}
func TestCalculateCostWithServiceTier_OpenAIPriorityUsesPriorityPricing(t *testing.T) {
svc := newTestBillingService()
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheReadTokens: 20}
baseCost, err := svc.CalculateCost("gpt-5.1-codex", tokens, 1.0)
require.NoError(t, err)
priorityCost, err := svc.CalculateCostWithServiceTier("gpt-5.1-codex", tokens, 1.0, "priority")
require.NoError(t, err)
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
}
func TestCalculateCostWithServiceTier_FlexAppliesHalfMultiplier(t *testing.T) {
svc := newTestBillingService()
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
baseCost, err := svc.CalculateCost("gpt-5.4", tokens, 1.0)
require.NoError(t, err)
flexCost, err := svc.CalculateCostWithServiceTier("gpt-5.4", tokens, 1.0, "flex")
require.NoError(t, err)
require.InDelta(t, baseCost.InputCost*0.5, flexCost.InputCost, 1e-10)
require.InDelta(t, baseCost.OutputCost*0.5, flexCost.OutputCost, 1e-10)
require.InDelta(t, baseCost.CacheCreationCost*0.5, flexCost.CacheCreationCost, 1e-10)
require.InDelta(t, baseCost.CacheReadCost*0.5, flexCost.CacheReadCost, 1e-10)
require.InDelta(t, baseCost.TotalCost*0.5, flexCost.TotalCost, 1e-10)
}
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWithoutExplicitPriorityPrice(t *testing.T) {
svc := newTestBillingService()
tokens := UsageTokens{InputTokens: 120, OutputTokens: 30, CacheCreationTokens: 12, CacheReadTokens: 8}
baseCost, err := svc.CalculateCost("claude-sonnet-4", tokens, 1.0)
require.NoError(t, err)
priorityCost, err := svc.CalculateCostWithServiceTier("claude-sonnet-4", tokens, 1.0, "priority")
require.NoError(t, err)
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
}
func TestBillingServiceGetModelPricing_UsesDynamicPriorityFields(t *testing.T) {
pricingSvc := &PricingService{
pricingData: map[string]*LiteLLMModelPricing{
"gpt-5.4": {
InputCostPerToken: 2.5e-6,
InputCostPerTokenPriority: 5e-6,
OutputCostPerToken: 15e-6,
OutputCostPerTokenPriority: 30e-6,
CacheCreationInputTokenCost: 2.5e-6,
CacheReadInputTokenCost: 0.25e-6,
CacheReadInputTokenCostPriority: 0.5e-6,
LongContextInputTokenThreshold: 272000,
LongContextInputCostMultiplier: 2.0,
LongContextOutputCostMultiplier: 1.5,
},
},
}
svc := NewBillingService(&config.Config{}, pricingSvc)
pricing, err := svc.GetModelPricing("gpt-5.4")
require.NoError(t, err)
require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
require.InDelta(t, 5e-6, pricing.InputPricePerTokenPriority, 1e-12)
require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
require.InDelta(t, 30e-6, pricing.OutputPricePerTokenPriority, 1e-12)
require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
require.InDelta(t, 0.5e-6, pricing.CacheReadPricePerTokenPriority, 1e-12)
require.Equal(t, 272000, pricing.LongContextInputThreshold)
require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12)
require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12)
}
func TestBillingServiceGetModelPricing_OpenAIFallbackGpt52Variants(t *testing.T) {
svc := newTestBillingService()
gpt52, err := svc.GetModelPricing("gpt-5.2")
require.NoError(t, err)
require.NotNil(t, gpt52)
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
require.NoError(t, err)
require.NotNil(t, gpt52Codex)
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
}
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWhenExplicitPriceMissing(t *testing.T) {
svc := NewBillingService(&config.Config{}, &PricingService{
pricingData: map[string]*LiteLLMModelPricing{
"custom-no-priority": {
InputCostPerToken: 1e-6,
OutputCostPerToken: 2e-6,
CacheCreationInputTokenCost: 0.5e-6,
CacheReadInputTokenCost: 0.25e-6,
},
},
})
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
baseCost, err := svc.CalculateCost("custom-no-priority", tokens, 1.0)
require.NoError(t, err)
priorityCost, err := svc.CalculateCostWithServiceTier("custom-no-priority", tokens, 1.0, "priority")
require.NoError(t, err)
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
}
func TestGetModelPricing_OpenAIGpt52FallbacksExposePriorityPrices(t *testing.T) {
svc := newTestBillingService()
gpt52, err := svc.GetModelPricing("gpt-5.2")
require.NoError(t, err)
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
require.InDelta(t, 14e-6, gpt52.OutputPricePerToken, 1e-12)
require.InDelta(t, 28e-6, gpt52.OutputPricePerTokenPriority, 1e-12)
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
require.NoError(t, err)
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
require.InDelta(t, 14e-6, gpt52Codex.OutputPricePerToken, 1e-12)
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
}
func TestGetModelPricing_MapsDynamicPriorityFieldsIntoBillingPricing(t *testing.T) {
svc := NewBillingService(&config.Config{}, &PricingService{
pricingData: map[string]*LiteLLMModelPricing{
"dynamic-tier-model": {
InputCostPerToken: 1e-6,
InputCostPerTokenPriority: 2e-6,
OutputCostPerToken: 3e-6,
OutputCostPerTokenPriority: 6e-6,
CacheCreationInputTokenCost: 4e-6,
CacheCreationInputTokenCostAbove1hr: 5e-6,
CacheReadInputTokenCost: 7e-7,
CacheReadInputTokenCostPriority: 8e-7,
LongContextInputTokenThreshold: 999,
LongContextInputCostMultiplier: 1.5,
LongContextOutputCostMultiplier: 1.25,
},
},
})
pricing, err := svc.GetModelPricing("dynamic-tier-model")
require.NoError(t, err)
require.InDelta(t, 1e-6, pricing.InputPricePerToken, 1e-12)
require.InDelta(t, 2e-6, pricing.InputPricePerTokenPriority, 1e-12)
require.InDelta(t, 3e-6, pricing.OutputPricePerToken, 1e-12)
require.InDelta(t, 6e-6, pricing.OutputPricePerTokenPriority, 1e-12)
require.InDelta(t, 4e-6, pricing.CacheCreation5mPrice, 1e-12)
require.InDelta(t, 5e-6, pricing.CacheCreation1hPrice, 1e-12)
require.True(t, pricing.SupportsCacheBreakdown)
require.InDelta(t, 7e-7, pricing.CacheReadPricePerToken, 1e-12)
require.InDelta(t, 8e-7, pricing.CacheReadPricePerTokenPriority, 1e-12)
require.Equal(t, 999, pricing.LongContextInputThreshold)
require.InDelta(t, 1.5, pricing.LongContextInputMultiplier, 1e-12)
require.InDelta(t, 1.25, pricing.LongContextOutputMultiplier, 1e-12)
}

View File

@@ -334,3 +334,225 @@ func TestOpenAIGatewayServiceRecordUsage_ClampsActualInputTokensToZero(t *testin
require.NotNil(t, usageRepo.lastLog)
require.Equal(t, 0, usageRepo.lastLog.InputTokens)
}
func TestOpenAIGatewayServiceRecordUsage_Gpt54LongContextBillsWholeSession(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_gpt54_long_context",
Usage: OpenAIUsage{
InputTokens: 300000,
OutputTokens: 2000,
},
Model: "gpt-5.4-2026-03-05",
Duration: time.Second,
},
APIKey: &APIKey{ID: 1014},
User: &User{ID: 2014},
Account: &Account{ID: 3014},
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
expectedInput := 300000 * 2.5e-6 * 2.0
expectedOutput := 2000 * 15e-6 * 1.5
require.InDelta(t, expectedInput, usageRepo.lastLog.InputCost, 1e-10)
require.InDelta(t, expectedOutput, usageRepo.lastLog.OutputCost, 1e-10)
require.InDelta(t, expectedInput+expectedOutput, usageRepo.lastLog.TotalCost, 1e-10)
require.InDelta(t, (expectedInput+expectedOutput)*1.1, usageRepo.lastLog.ActualCost, 1e-10)
require.Equal(t, 1, userRepo.deductCalls)
}
func TestOpenAIGatewayServiceRecordUsage_ServiceTierPriorityUsesFastPricing(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
serviceTier := "priority"
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50}
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_service_tier_priority",
ServiceTier: &serviceTier,
Usage: usage,
Model: "gpt-5.4",
Duration: time.Second,
},
APIKey: &APIKey{ID: 1015},
User: &User{ID: 2015},
Account: &Account{ID: 3015},
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
require.NotNil(t, usageRepo.lastLog.ServiceTier)
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 100, OutputTokens: 50}, 1.0)
require.NoError(t, calcErr)
require.InDelta(t, baseCost.TotalCost*2, usageRepo.lastLog.TotalCost, 1e-10)
}
func TestOpenAIGatewayServiceRecordUsage_ServiceTierFlexHalvesCost(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
serviceTier := "flex"
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50, CacheReadInputTokens: 20}
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_service_tier_flex",
ServiceTier: &serviceTier,
Usage: usage,
Model: "gpt-5.4",
Duration: time.Second,
},
APIKey: &APIKey{ID: 1016},
User: &User{ID: 2016},
Account: &Account{ID: 3016},
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 80, OutputTokens: 50, CacheReadTokens: 20}, 1.0)
require.NoError(t, calcErr)
require.InDelta(t, baseCost.TotalCost*0.5, usageRepo.lastLog.TotalCost, 1e-10)
}
func TestNormalizeOpenAIServiceTier(t *testing.T) {
t.Run("fast maps to priority", func(t *testing.T) {
got := normalizeOpenAIServiceTier(" fast ")
require.NotNil(t, got)
require.Equal(t, "priority", *got)
})
t.Run("default ignored", func(t *testing.T) {
require.Nil(t, normalizeOpenAIServiceTier("default"))
})
t.Run("invalid ignored", func(t *testing.T) {
require.Nil(t, normalizeOpenAIServiceTier("turbo"))
})
}
func TestExtractOpenAIServiceTier(t *testing.T) {
require.Equal(t, "priority", *extractOpenAIServiceTier(map[string]any{"service_tier": "fast"}))
require.Equal(t, "flex", *extractOpenAIServiceTier(map[string]any{"service_tier": "flex"}))
require.Nil(t, extractOpenAIServiceTier(map[string]any{"service_tier": 1}))
require.Nil(t, extractOpenAIServiceTier(nil))
}
func TestExtractOpenAIServiceTierFromBody(t *testing.T) {
require.Equal(t, "priority", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"fast"}`)))
require.Equal(t, "flex", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"flex"}`)))
require.Nil(t, extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"default"}`)))
require.Nil(t, extractOpenAIServiceTierFromBody(nil))
}
func TestOpenAIGatewayServiceRecordUsage_UsesBillingModelAndMetadataFields(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
serviceTier := "priority"
reasoning := "high"
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_billing_model_override",
BillingModel: "gpt-5.1-codex",
Model: "gpt-5.1",
ServiceTier: &serviceTier,
ReasoningEffort: &reasoning,
Usage: OpenAIUsage{
InputTokens: 20,
OutputTokens: 10,
},
Duration: 2 * time.Second,
FirstTokenMs: func() *int { v := 120; return &v }(),
},
APIKey: &APIKey{ID: 10, GroupID: i64p(11), Group: &Group{ID: 11, RateMultiplier: 1.2}},
User: &User{ID: 20},
Account: &Account{ID: 30},
UserAgent: "codex-cli/1.0",
IPAddress: "127.0.0.1",
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
require.Equal(t, "gpt-5.1-codex", usageRepo.lastLog.Model)
require.NotNil(t, usageRepo.lastLog.ServiceTier)
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
require.NotNil(t, usageRepo.lastLog.ReasoningEffort)
require.Equal(t, reasoning, *usageRepo.lastLog.ReasoningEffort)
require.NotNil(t, usageRepo.lastLog.UserAgent)
require.Equal(t, "codex-cli/1.0", *usageRepo.lastLog.UserAgent)
require.NotNil(t, usageRepo.lastLog.IPAddress)
require.Equal(t, "127.0.0.1", *usageRepo.lastLog.IPAddress)
require.NotNil(t, usageRepo.lastLog.GroupID)
require.Equal(t, int64(11), *usageRepo.lastLog.GroupID)
require.Equal(t, 1, userRepo.deductCalls)
}
func TestOpenAIGatewayServiceRecordUsage_SubscriptionBillingSetsSubscriptionFields(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
subscription := &UserSubscription{ID: 99}
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_subscription_billing",
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
Model: "gpt-5.1",
Duration: time.Second,
},
APIKey: &APIKey{ID: 100, GroupID: i64p(88), Group: &Group{ID: 88, SubscriptionType: SubscriptionTypeSubscription}},
User: &User{ID: 200},
Account: &Account{ID: 300},
Subscription: subscription,
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
require.Equal(t, BillingTypeSubscription, usageRepo.lastLog.BillingType)
require.NotNil(t, usageRepo.lastLog.SubscriptionID)
require.Equal(t, subscription.ID, *usageRepo.lastLog.SubscriptionID)
require.Equal(t, 1, subRepo.incrementCalls)
require.Equal(t, 0, userRepo.deductCalls)
}
func TestOpenAIGatewayServiceRecordUsage_SimpleModeSkipsBillingAfterPersist(t *testing.T) {
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
userRepo := &openAIRecordUsageUserRepoStub{}
subRepo := &openAIRecordUsageSubRepoStub{}
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
svc.cfg.RunMode = config.RunModeSimple
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
Result: &OpenAIForwardResult{
RequestID: "resp_simple_mode",
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
Model: "gpt-5.1",
Duration: time.Second,
},
APIKey: &APIKey{ID: 1000},
User: &User{ID: 2000},
Account: &Account{ID: 3000},
})
require.NoError(t, err)
require.Equal(t, 1, usageRepo.calls)
require.Equal(t, 0, userRepo.deductCalls)
require.Equal(t, 0, subRepo.incrementCalls)
}

View File

@@ -213,6 +213,9 @@ type OpenAIForwardResult struct {
// This is set by the Anthropic Messages conversion path where
// the mapped upstream model differs from the client-facing model.
BillingModel string
// ServiceTier records the OpenAI Responses API service tier, e.g. "priority" / "flex".
// Nil means the request did not specify a recognized tier.
ServiceTier *string
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
// Stored for usage records display; nil means not provided / not applicable.
ReasoningEffort *string
@@ -2036,11 +2039,13 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
}
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
serviceTier := extractOpenAIServiceTier(reqBody)
return &OpenAIForwardResult{
RequestID: resp.Header.Get("x-request-id"),
Usage: *usage,
Model: originalModel,
ServiceTier: serviceTier,
ReasoningEffort: reasoningEffort,
Stream: reqStream,
OpenAIWSMode: false,
@@ -2195,6 +2200,7 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
RequestID: resp.Header.Get("x-request-id"),
Usage: *usage,
Model: reqModel,
ServiceTier: extractOpenAIServiceTierFromBody(body),
ReasoningEffort: reasoningEffort,
Stream: reqStream,
OpenAIWSMode: false,
@@ -3628,7 +3634,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
if result.BillingModel != "" {
billingModel = result.BillingModel
}
cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
serviceTier := ""
if result.ServiceTier != nil {
serviceTier = strings.TrimSpace(*result.ServiceTier)
}
cost, err := s.billingService.CalculateCostWithServiceTier(billingModel, tokens, multiplier, serviceTier)
if err != nil {
cost = &CostBreakdown{ActualCost: 0}
}
@@ -3649,6 +3659,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
AccountID: account.ID,
RequestID: result.RequestID,
Model: billingModel,
ServiceTier: result.ServiceTier,
ReasoningEffort: result.ReasoningEffort,
InputTokens: actualInputTokens,
OutputTokens: result.Usage.OutputTokens,
@@ -4047,6 +4058,40 @@ func extractOpenAIReasoningEffortFromBody(body []byte, requestedModel string) *s
return &value
}
func extractOpenAIServiceTier(reqBody map[string]any) *string {
if reqBody == nil {
return nil
}
raw, ok := reqBody["service_tier"].(string)
if !ok {
return nil
}
return normalizeOpenAIServiceTier(raw)
}
func extractOpenAIServiceTierFromBody(body []byte) *string {
if len(body) == 0 {
return nil
}
return normalizeOpenAIServiceTier(gjson.GetBytes(body, "service_tier").String())
}
func normalizeOpenAIServiceTier(raw string) *string {
value := strings.ToLower(strings.TrimSpace(raw))
if value == "" {
return nil
}
if value == "fast" {
value = "priority"
}
switch value {
case "priority", "flex":
return &value
default:
return nil
}
}
func getOpenAIRequestBodyMap(c *gin.Context, body []byte) (map[string]any, error) {
if c != nil {
if cached, ok := c.Get(OpenAIParsedRequestBodyKey); ok {

View File

@@ -671,7 +671,7 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", bytes.NewReader(nil))
c.Request.Header.Set("User-Agent", "codex_cli_rs/0.1.0")
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"input":[{"type":"text","text":"hi"}]}`)
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"service_tier":"fast","input":[{"type":"text","text":"hi"}]}`)
upstreamSSE := strings.Join([]string{
`data: {"type":"response.output_text.delta","delta":"h"}`,
@@ -711,6 +711,8 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
require.GreaterOrEqual(t, time.Since(start), time.Duration(0))
require.NotNil(t, result.FirstTokenMs)
require.GreaterOrEqual(t, *result.FirstTokenMs, 0)
require.NotNil(t, result.ServiceTier)
require.Equal(t, "priority", *result.ServiceTier)
}
func TestOpenAIGatewayService_OAuthPassthrough_StreamClientDisconnectStillCollectsUsage(t *testing.T) {
@@ -777,7 +779,7 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
c.Request.Header.Set("User-Agent", "curl/8.0")
c.Request.Header.Set("X-Test", "keep")
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"service_tier":"flex","max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
resp := &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid"}},
@@ -803,8 +805,11 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
RateMultiplier: f64p(1),
}
_, err := svc.Forward(context.Background(), c, account, originalBody)
result, err := svc.Forward(context.Background(), c, account, originalBody)
require.NoError(t, err)
require.NotNil(t, result)
require.NotNil(t, result.ServiceTier)
require.Equal(t, "flex", *result.ServiceTier)
require.NotNil(t, upstream.lastReq)
require.Equal(t, originalBody, upstream.lastBody)
require.Equal(t, "https://api.openai.com/v1/responses", upstream.lastReq.URL.String())

View File

@@ -2302,6 +2302,7 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
RequestID: responseID,
Usage: *usage,
Model: originalModel,
ServiceTier: extractOpenAIServiceTier(reqBody),
ReasoningEffort: extractOpenAIReasoningEffort(reqBody, originalModel),
Stream: reqStream,
OpenAIWSMode: true,
@@ -2913,6 +2914,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
RequestID: responseID,
Usage: usage,
Model: originalModel,
ServiceTier: extractOpenAIServiceTierFromBody(payload),
ReasoningEffort: extractOpenAIReasoningEffortFromBody(payload, originalModel),
Stream: reqStream,
OpenAIWSMode: true,

View File

@@ -399,7 +399,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
}()
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false}`))
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false,"service_tier":"fast"}`))
cancelWrite()
require.NoError(t, err)
@@ -424,6 +424,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
require.True(t, result.OpenAIWSMode)
require.Equal(t, 2, result.Usage.InputTokens)
require.Equal(t, 3, result.Usage.OutputTokens)
require.NotNil(t, result.ServiceTier)
require.Equal(t, "priority", *result.ServiceTier)
case <-time.After(2 * time.Second):
t.Fatal("未收到 passthrough turn 结果回调")
}
@@ -2593,7 +2595,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
require.NoError(t, err)
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false}`))
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false,"service_tier":"flex"}`))
cancelWrite()
require.NoError(t, err)
// 立即关闭客户端,模拟客户端在 relay 期间断连。
@@ -2611,6 +2613,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
require.Equal(t, "resp_ingress_disconnect", result.RequestID)
require.Equal(t, 2, result.Usage.InputTokens)
require.Equal(t, 1, result.Usage.OutputTokens)
require.NotNil(t, result.ServiceTier)
require.Equal(t, "flex", *result.ServiceTier)
case <-time.After(2 * time.Second):
t.Fatal("未收到断连后的 turn 结果回调")
}

View File

@@ -77,6 +77,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
return errors.New("token is empty")
}
requestModel := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "model").String())
requestServiceTier := extractOpenAIServiceTierFromBody(firstClientMessage)
requestPreviousResponseID := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "previous_response_id").String())
logOpenAIWSV2Passthrough(
"relay_start account_id=%d model=%s previous_response_id=%s first_message_type=%s first_message_bytes=%d",
@@ -178,6 +179,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
CacheReadInputTokens: turn.Usage.CacheReadInputTokens,
},
Model: turn.RequestModel,
ServiceTier: requestServiceTier,
Stream: true,
OpenAIWSMode: true,
ResponseHeaders: cloneHeader(handshakeHeaders),
@@ -225,6 +227,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
CacheReadInputTokens: relayResult.Usage.CacheReadInputTokens,
},
Model: relayResult.RequestModel,
ServiceTier: requestServiceTier,
Stream: true,
OpenAIWSMode: true,
ResponseHeaders: cloneHeader(handshakeHeaders),

View File

@@ -40,13 +40,17 @@ var (
// 只保留我们需要的字段,使用指针来处理可能缺失的值
type LiteLLMModelPricing struct {
InputCostPerToken float64 `json:"input_cost_per_token"`
InputCostPerTokenPriority float64 `json:"input_cost_per_token_priority"`
OutputCostPerToken float64 `json:"output_cost_per_token"`
OutputCostPerTokenPriority float64 `json:"output_cost_per_token_priority"`
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
CacheReadInputTokenCostPriority float64 `json:"cache_read_input_token_cost_priority"`
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
SupportsServiceTier bool `json:"supports_service_tier"`
LiteLLMProvider string `json:"litellm_provider"`
Mode string `json:"mode"`
SupportsPromptCaching bool `json:"supports_prompt_caching"`
@@ -62,10 +66,14 @@ type PricingRemoteClient interface {
// LiteLLMRawEntry 用于解析原始JSON数据
type LiteLLMRawEntry struct {
InputCostPerToken *float64 `json:"input_cost_per_token"`
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority"`
OutputCostPerToken *float64 `json:"output_cost_per_token"`
OutputCostPerTokenPriority *float64 `json:"output_cost_per_token_priority"`
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"`
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"`
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"`
CacheReadInputTokenCostPriority *float64 `json:"cache_read_input_token_cost_priority"`
SupportsServiceTier bool `json:"supports_service_tier"`
LiteLLMProvider string `json:"litellm_provider"`
Mode string `json:"mode"`
SupportsPromptCaching bool `json:"supports_prompt_caching"`
@@ -324,14 +332,21 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
LiteLLMProvider: entry.LiteLLMProvider,
Mode: entry.Mode,
SupportsPromptCaching: entry.SupportsPromptCaching,
SupportsServiceTier: entry.SupportsServiceTier,
}
if entry.InputCostPerToken != nil {
pricing.InputCostPerToken = *entry.InputCostPerToken
}
if entry.InputCostPerTokenPriority != nil {
pricing.InputCostPerTokenPriority = *entry.InputCostPerTokenPriority
}
if entry.OutputCostPerToken != nil {
pricing.OutputCostPerToken = *entry.OutputCostPerToken
}
if entry.OutputCostPerTokenPriority != nil {
pricing.OutputCostPerTokenPriority = *entry.OutputCostPerTokenPriority
}
if entry.CacheCreationInputTokenCost != nil {
pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost
}
@@ -341,6 +356,9 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
if entry.CacheReadInputTokenCost != nil {
pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost
}
if entry.CacheReadInputTokenCostPriority != nil {
pricing.CacheReadInputTokenCostPriority = *entry.CacheReadInputTokenCostPriority
}
if entry.OutputCostPerImage != nil {
pricing.OutputCostPerImage = *entry.OutputCostPerImage
}

View File

@@ -1,11 +1,40 @@
package service
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/require"
)
func TestParsePricingData_ParsesPriorityAndServiceTierFields(t *testing.T) {
svc := &PricingService{}
body := []byte(`{
"gpt-5.4": {
"input_cost_per_token": 0.0000025,
"input_cost_per_token_priority": 0.000005,
"output_cost_per_token": 0.000015,
"output_cost_per_token_priority": 0.00003,
"cache_creation_input_token_cost": 0.0000025,
"cache_read_input_token_cost": 0.00000025,
"cache_read_input_token_cost_priority": 0.0000005,
"supports_service_tier": true,
"supports_prompt_caching": true,
"litellm_provider": "openai",
"mode": "chat"
}
}`)
data, err := svc.parsePricingData(body)
require.NoError(t, err)
pricing := data["gpt-5.4"]
require.NotNil(t, pricing)
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
require.InDelta(t, 3e-5, pricing.OutputCostPerTokenPriority, 1e-12)
require.InDelta(t, 5e-7, pricing.CacheReadInputTokenCostPriority, 1e-12)
require.True(t, pricing.SupportsServiceTier)
}
func TestGetModelPricing_Gpt53CodexSparkUsesGpt51CodexPricing(t *testing.T) {
sparkPricing := &LiteLLMModelPricing{InputCostPerToken: 1}
gpt53Pricing := &LiteLLMModelPricing{InputCostPerToken: 9}
@@ -68,3 +97,64 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T)
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
}
func TestParsePricingData_PreservesPriorityAndServiceTierFields(t *testing.T) {
raw := map[string]any{
"gpt-5.4": map[string]any{
"input_cost_per_token": 2.5e-6,
"input_cost_per_token_priority": 5e-6,
"output_cost_per_token": 15e-6,
"output_cost_per_token_priority": 30e-6,
"cache_read_input_token_cost": 0.25e-6,
"cache_read_input_token_cost_priority": 0.5e-6,
"supports_service_tier": true,
"supports_prompt_caching": true,
"litellm_provider": "openai",
"mode": "chat",
},
}
body, err := json.Marshal(raw)
require.NoError(t, err)
svc := &PricingService{}
pricingMap, err := svc.parsePricingData(body)
require.NoError(t, err)
pricing := pricingMap["gpt-5.4"]
require.NotNil(t, pricing)
require.InDelta(t, 2.5e-6, pricing.InputCostPerToken, 1e-12)
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
require.InDelta(t, 15e-6, pricing.OutputCostPerToken, 1e-12)
require.InDelta(t, 30e-6, pricing.OutputCostPerTokenPriority, 1e-12)
require.InDelta(t, 0.25e-6, pricing.CacheReadInputTokenCost, 1e-12)
require.InDelta(t, 0.5e-6, pricing.CacheReadInputTokenCostPriority, 1e-12)
require.True(t, pricing.SupportsServiceTier)
}
func TestParsePricingData_PreservesServiceTierPriorityFields(t *testing.T) {
svc := &PricingService{}
pricingData, err := svc.parsePricingData([]byte(`{
"gpt-5.4": {
"input_cost_per_token": 0.0000025,
"input_cost_per_token_priority": 0.000005,
"output_cost_per_token": 0.000015,
"output_cost_per_token_priority": 0.00003,
"cache_read_input_token_cost": 0.00000025,
"cache_read_input_token_cost_priority": 0.0000005,
"supports_service_tier": true,
"litellm_provider": "openai",
"mode": "chat"
}
}`))
require.NoError(t, err)
pricing := pricingData["gpt-5.4"]
require.NotNil(t, pricing)
require.InDelta(t, 0.0000025, pricing.InputCostPerToken, 1e-12)
require.InDelta(t, 0.000005, pricing.InputCostPerTokenPriority, 1e-12)
require.InDelta(t, 0.000015, pricing.OutputCostPerToken, 1e-12)
require.InDelta(t, 0.00003, pricing.OutputCostPerTokenPriority, 1e-12)
require.InDelta(t, 0.00000025, pricing.CacheReadInputTokenCost, 1e-12)
require.InDelta(t, 0.0000005, pricing.CacheReadInputTokenCostPriority, 1e-12)
require.True(t, pricing.SupportsServiceTier)
}

View File

@@ -98,6 +98,8 @@ type UsageLog struct {
AccountID int64
RequestID string
Model string
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
ServiceTier *string
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API),
// e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable.
ReasoningEffort *string

View File

@@ -0,0 +1,5 @@
ALTER TABLE usage_logs
ADD COLUMN IF NOT EXISTS service_tier VARCHAR(16);
CREATE INDEX IF NOT EXISTS idx_usage_logs_service_tier_created_at
ON usage_logs (service_tier, created_at);

View File

@@ -228,6 +228,14 @@
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
</div>
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
</div>
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
</div>
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
@@ -238,6 +246,10 @@
</div>
</div>
<!-- Rate and Summary -->
<div class="flex items-center justify-between gap-6">
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
</div>
<div class="flex items-center justify-between gap-6">
<span class="text-gray-400">{{ t('usage.rate') }}</span>
<span class="font-semibold text-blue-400">{{ (tooltipData?.rate_multiplier || 1).toFixed(2) }}x</span>
@@ -271,6 +283,8 @@
import { ref } from 'vue'
import { useI18n } from 'vue-i18n'
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
import { resolveUsageRequestType } from '@/utils/usageRequestType'
import DataTable from '@/components/common/DataTable.vue'
import EmptyState from '@/components/common/EmptyState.vue'

View File

@@ -0,0 +1,111 @@
import { describe, expect, it, vi, beforeEach } from 'vitest'
import { mount } from '@vue/test-utils'
import { nextTick } from 'vue'
import UsageTable from '../UsageTable.vue'
const messages: Record<string, string> = {
'usage.costDetails': 'Cost Breakdown',
'admin.usage.inputCost': 'Input Cost',
'admin.usage.outputCost': 'Output Cost',
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
'admin.usage.cacheReadCost': 'Cache Read Cost',
'usage.inputTokenPrice': 'Input price',
'usage.outputTokenPrice': 'Output price',
'usage.perMillionTokens': '/ 1M tokens',
'usage.serviceTier': 'Service tier',
'usage.serviceTierPriority': 'Fast',
'usage.serviceTierFlex': 'Flex',
'usage.serviceTierStandard': 'Standard',
'usage.rate': 'Rate',
'usage.accountMultiplier': 'Account rate',
'usage.original': 'Original',
'usage.userBilled': 'User billed',
'usage.accountBilled': 'Account billed',
}
vi.mock('vue-i18n', async () => {
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
return {
...actual,
useI18n: () => ({
t: (key: string) => messages[key] ?? key,
}),
}
})
const DataTableStub = {
props: ['data'],
template: `
<div>
<div v-for="row in data" :key="row.request_id">
<slot name="cell-cost" :row="row" />
</div>
</div>
`,
}
describe('admin UsageTable tooltip', () => {
beforeEach(() => {
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
x: 0,
y: 0,
top: 20,
left: 20,
right: 120,
bottom: 40,
width: 100,
height: 20,
toJSON: () => ({}),
} as DOMRect)
})
it('shows service tier and billing breakdown in cost tooltip', async () => {
const row = {
request_id: 'req-admin-1',
actual_cost: 0.092883,
total_cost: 0.092883,
account_rate_multiplier: 1,
rate_multiplier: 1,
service_tier: 'priority',
input_cost: 0.020285,
output_cost: 0.00303,
cache_creation_cost: 0,
cache_read_cost: 0.069568,
input_tokens: 4057,
output_tokens: 101,
}
const wrapper = mount(UsageTable, {
props: {
data: [row],
loading: false,
columns: [],
},
global: {
stubs: {
DataTable: DataTableStub,
EmptyState: true,
Icon: true,
Teleport: true,
},
},
})
await wrapper.find('.group.relative').trigger('mouseenter')
await nextTick()
const text = wrapper.text()
expect(text).toContain('Service tier')
expect(text).toContain('Fast')
expect(text).toContain('Rate')
expect(text).toContain('1.00x')
expect(text).toContain('Account rate')
expect(text).toContain('User billed')
expect(text).toContain('Account billed')
expect(text).toContain('$0.092883')
expect(text).toContain('$5.0000 / 1M tokens')
expect(text).toContain('$30.0000 / 1M tokens')
expect(text).toContain('$0.069568')
})
})

View File

@@ -0,0 +1,20 @@
import { describe, expect, it } from 'vitest'
import en from '../locales/en'
import zh from '../locales/zh'
describe('usage service tier locale keys', () => {
it('contains zh labels for service tier tooltip', () => {
expect(zh.usage.serviceTier).toBe('服务档位')
expect(zh.usage.serviceTierPriority).toBe('Fast')
expect(zh.usage.serviceTierFlex).toBe('Flex')
expect(zh.usage.serviceTierStandard).toBe('Standard')
})
it('contains en labels for service tier tooltip', () => {
expect(en.usage.serviceTier).toBe('Service tier')
expect(en.usage.serviceTierPriority).toBe('Fast')
expect(en.usage.serviceTierFlex).toBe('Flex')
expect(en.usage.serviceTierStandard).toBe('Standard')
})
})

View File

@@ -722,8 +722,15 @@ export default {
unknown: 'Unknown',
in: 'In',
out: 'Out',
inputTokenPrice: 'Input price',
outputTokenPrice: 'Output price',
perMillionTokens: '/ 1M tokens',
cacheRead: 'Read',
cacheWrite: 'Write',
serviceTier: 'Service tier',
serviceTierPriority: 'Fast',
serviceTierFlex: 'Flex',
serviceTierStandard: 'Standard',
rate: 'Rate',
original: 'Original',
billed: 'Billed',

View File

@@ -727,8 +727,15 @@ export default {
unknown: '未知',
in: '输入',
out: '输出',
inputTokenPrice: '输入单价',
outputTokenPrice: '输出单价',
perMillionTokens: '/ 1M Token',
cacheRead: '读取',
cacheWrite: '写入',
serviceTier: '服务档位',
serviceTierPriority: 'Fast',
serviceTierFlex: 'Flex',
serviceTierStandard: 'Standard',
rate: '倍率',
original: '原始',
billed: '计费',

View File

@@ -927,6 +927,7 @@ export interface UsageLog {
account_id: number | null
request_id: string
model: string
service_tier?: string | null
reasoning_effort?: string | null
group_id: number | null

View File

@@ -0,0 +1,39 @@
import { describe, expect, it } from 'vitest'
import { formatUsageServiceTier, getUsageServiceTierLabel, normalizeUsageServiceTier } from '@/utils/usageServiceTier'
describe('usageServiceTier utils', () => {
it('normalizes fast/default aliases', () => {
expect(normalizeUsageServiceTier('fast')).toBe('priority')
expect(normalizeUsageServiceTier(' default ')).toBe('standard')
expect(normalizeUsageServiceTier('STANDARD')).toBe('standard')
})
it('preserves supported tiers', () => {
expect(normalizeUsageServiceTier('priority')).toBe('priority')
expect(normalizeUsageServiceTier('flex')).toBe('flex')
})
it('formats empty values as standard', () => {
expect(formatUsageServiceTier()).toBe('standard')
expect(formatUsageServiceTier('')).toBe('standard')
})
it('passes through unknown non-empty tiers for display fallback', () => {
expect(normalizeUsageServiceTier('custom-tier')).toBe('custom-tier')
expect(formatUsageServiceTier('custom-tier')).toBe('custom-tier')
})
it('maps tiers to translated labels', () => {
const translate = (key: string) => ({
'usage.serviceTierPriority': 'Fast',
'usage.serviceTierFlex': 'Flex',
'usage.serviceTierStandard': 'Standard',
})[key] ?? key
expect(getUsageServiceTierLabel('fast', translate)).toBe('Fast')
expect(getUsageServiceTierLabel('flex', translate)).toBe('Flex')
expect(getUsageServiceTierLabel(undefined, translate)).toBe('Standard')
expect(getUsageServiceTierLabel('custom-tier', translate)).toBe('custom-tier')
})
})

View File

@@ -0,0 +1,49 @@
export const TOKENS_PER_MILLION = 1_000_000
interface TokenPriceFormatOptions {
fractionDigits?: number
withCurrencySymbol?: boolean
emptyValue?: string
}
function isFiniteNumber(value: unknown): value is number {
return typeof value === 'number' && Number.isFinite(value)
}
export function calculateTokenUnitPrice(
cost: number | null | undefined,
tokens: number | null | undefined
): number | null {
if (!isFiniteNumber(cost) || !isFiniteNumber(tokens) || tokens <= 0) {
return null
}
return cost / tokens
}
export function calculateTokenPricePerMillion(
cost: number | null | undefined,
tokens: number | null | undefined
): number | null {
const unitPrice = calculateTokenUnitPrice(cost, tokens)
if (unitPrice == null) {
return null
}
return unitPrice * TOKENS_PER_MILLION
}
export function formatTokenPricePerMillion(
cost: number | null | undefined,
tokens: number | null | undefined,
options: TokenPriceFormatOptions = {}
): string {
const pricePerMillion = calculateTokenPricePerMillion(cost, tokens)
if (pricePerMillion == null) {
return options.emptyValue ?? '-'
}
const fractionDigits = options.fractionDigits ?? 4
const formatted = pricePerMillion.toFixed(fractionDigits)
return options.withCurrencySymbol == false ? formatted : `$${formatted}`
}

View File

@@ -0,0 +1,25 @@
export function normalizeUsageServiceTier(serviceTier?: string | null): string | null {
const value = serviceTier?.trim().toLowerCase()
if (!value) return null
if (value === 'fast') return 'priority'
if (value === 'default' || value === 'standard') return 'standard'
if (value === 'priority' || value === 'flex') return value
return value
}
export function formatUsageServiceTier(serviceTier?: string | null): string {
const normalized = normalizeUsageServiceTier(serviceTier)
if (!normalized) return 'standard'
return normalized
}
export function getUsageServiceTierLabel(
serviceTier: string | null | undefined,
translate: (key: string) => string,
): string {
const tier = formatUsageServiceTier(serviceTier)
if (tier === 'priority') return translate('usage.serviceTierPriority')
if (tier === 'flex') return translate('usage.serviceTierFlex')
if (tier === 'standard') return translate('usage.serviceTierStandard')
return tier
}

View File

@@ -426,6 +426,14 @@
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
</div>
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
</div>
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
</div>
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
@@ -436,6 +444,10 @@
</div>
</div>
<!-- Rate and Summary -->
<div class="flex items-center justify-between gap-6">
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
</div>
<div class="flex items-center justify-between gap-6">
<span class="text-gray-400">{{ t('usage.rate') }}</span>
<span class="font-semibold text-blue-400"
@@ -478,6 +490,8 @@ import Icon from '@/components/icons/Icon.vue'
import type { UsageLog, ApiKey, UsageQueryParams, UsageStatsResponse } from '@/types'
import type { Column } from '@/components/common/types'
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
import { resolveUsageRequestType } from '@/utils/usageRequestType'
const { t } = useI18n()

View File

@@ -0,0 +1,266 @@
import { describe, expect, it, vi, beforeEach } from 'vitest'
import { flushPromises, mount } from '@vue/test-utils'
import { nextTick } from 'vue'
import UsageView from '../UsageView.vue'
const { query, getStatsByDateRange, list, showError, showWarning, showSuccess, showInfo } = vi.hoisted(() => ({
query: vi.fn(),
getStatsByDateRange: vi.fn(),
list: vi.fn(),
showError: vi.fn(),
showWarning: vi.fn(),
showSuccess: vi.fn(),
showInfo: vi.fn(),
}))
const messages: Record<string, string> = {
'usage.costDetails': 'Cost Breakdown',
'admin.usage.inputCost': 'Input Cost',
'admin.usage.outputCost': 'Output Cost',
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
'admin.usage.cacheReadCost': 'Cache Read Cost',
'usage.inputTokenPrice': 'Input price',
'usage.outputTokenPrice': 'Output price',
'usage.perMillionTokens': '/ 1M tokens',
'usage.serviceTier': 'Service tier',
'usage.serviceTierPriority': 'Fast',
'usage.serviceTierFlex': 'Flex',
'usage.serviceTierStandard': 'Standard',
'usage.rate': 'Rate',
'usage.original': 'Original',
'usage.billed': 'Billed',
'usage.allApiKeys': 'All API Keys',
'usage.apiKeyFilter': 'API Key',
'usage.model': 'Model',
'usage.reasoningEffort': 'Reasoning Effort',
'usage.type': 'Type',
'usage.tokens': 'Tokens',
'usage.cost': 'Cost',
'usage.firstToken': 'First Token',
'usage.duration': 'Duration',
'usage.time': 'Time',
'usage.userAgent': 'User Agent',
}
vi.mock('@/api', () => ({
usageAPI: {
query,
getStatsByDateRange,
},
keysAPI: {
list,
},
}))
vi.mock('@/stores/app', () => ({
useAppStore: () => ({ showError, showWarning, showSuccess, showInfo }),
}))
vi.mock('vue-i18n', async () => {
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
return {
...actual,
useI18n: () => ({
t: (key: string) => messages[key] ?? key,
}),
}
})
const AppLayoutStub = { template: '<div><slot /></div>' }
const TablePageLayoutStub = {
template: '<div><slot name="actions" /><slot name="filters" /><slot /></div>',
}
describe('user UsageView tooltip', () => {
beforeEach(() => {
query.mockReset()
getStatsByDateRange.mockReset()
list.mockReset()
showError.mockReset()
showWarning.mockReset()
showSuccess.mockReset()
showInfo.mockReset()
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
x: 0,
y: 0,
top: 20,
left: 20,
right: 120,
bottom: 40,
width: 100,
height: 20,
toJSON: () => ({}),
} as DOMRect)
;(globalThis as any).ResizeObserver = class {
observe() {}
disconnect() {}
}
})
it('shows fast service tier and unit prices in user tooltip', async () => {
query.mockResolvedValue({
items: [
{
request_id: 'req-user-1',
actual_cost: 0.092883,
total_cost: 0.092883,
rate_multiplier: 1,
service_tier: 'priority',
input_cost: 0.020285,
output_cost: 0.00303,
cache_creation_cost: 0,
cache_read_cost: 0.069568,
input_tokens: 4057,
output_tokens: 101,
cache_creation_tokens: 0,
cache_read_tokens: 278272,
cache_creation_5m_tokens: 0,
cache_creation_1h_tokens: 0,
image_count: 0,
image_size: null,
first_token_ms: null,
duration_ms: 1,
created_at: '2026-03-08T00:00:00Z',
},
],
total: 1,
pages: 1,
})
getStatsByDateRange.mockResolvedValue({
total_requests: 1,
total_tokens: 100,
total_cost: 0.1,
avg_duration_ms: 1,
})
list.mockResolvedValue({ items: [] })
const wrapper = mount(UsageView, {
global: {
stubs: {
AppLayout: AppLayoutStub,
TablePageLayout: TablePageLayoutStub,
Pagination: true,
EmptyState: true,
Select: true,
DateRangePicker: true,
Icon: true,
Teleport: true,
},
},
})
await flushPromises()
await nextTick()
const setupState = (wrapper.vm as any).$?.setupState
setupState.tooltipData = {
request_id: 'req-user-1',
actual_cost: 0.092883,
total_cost: 0.092883,
rate_multiplier: 1,
service_tier: 'priority',
input_cost: 0.020285,
output_cost: 0.00303,
cache_creation_cost: 0,
cache_read_cost: 0.069568,
input_tokens: 4057,
output_tokens: 101,
}
setupState.tooltipVisible = true
await nextTick()
const text = wrapper.text()
expect(text).toContain('Service tier')
expect(text).toContain('Fast')
expect(text).toContain('Rate')
expect(text).toContain('1.00x')
expect(text).toContain('Billed')
expect(text).toContain('$0.092883')
expect(text).toContain('$5.0000 / 1M tokens')
expect(text).toContain('$30.0000 / 1M tokens')
})
it('exports csv with input and output unit price columns', async () => {
const exportedLogs = [
{
request_id: 'req-user-export',
actual_cost: 0.092883,
total_cost: 0.092883,
rate_multiplier: 1,
service_tier: 'priority',
input_cost: 0.020285,
output_cost: 0.00303,
cache_creation_cost: 0.000001,
cache_read_cost: 0.069568,
input_tokens: 4057,
output_tokens: 101,
cache_creation_tokens: 4,
cache_read_tokens: 278272,
cache_creation_5m_tokens: 0,
cache_creation_1h_tokens: 0,
image_count: 0,
image_size: null,
first_token_ms: 12,
duration_ms: 345,
created_at: '2026-03-08T00:00:00Z',
model: 'gpt-5.4',
reasoning_effort: null,
api_key: { name: 'demo-key' },
},
]
query.mockResolvedValue({
items: exportedLogs,
total: 1,
pages: 1,
})
getStatsByDateRange.mockResolvedValue({
total_requests: 1,
total_tokens: 100,
total_cost: 0.1,
avg_duration_ms: 1,
})
list.mockResolvedValue({ items: [] })
let exportedBlob: Blob | null = null
const originalCreateObjectURL = window.URL.createObjectURL
const originalRevokeObjectURL = window.URL.revokeObjectURL
window.URL.createObjectURL = vi.fn((blob: Blob | MediaSource) => {
exportedBlob = blob as Blob
return 'blob:usage-export'
}) as typeof window.URL.createObjectURL
window.URL.revokeObjectURL = vi.fn(() => {}) as typeof window.URL.revokeObjectURL
const clickSpy = vi.spyOn(HTMLAnchorElement.prototype, 'click').mockImplementation(() => {})
const wrapper = mount(UsageView, {
global: {
stubs: {
AppLayout: AppLayoutStub,
TablePageLayout: TablePageLayoutStub,
Pagination: true,
EmptyState: true,
Select: true,
DateRangePicker: true,
Icon: true,
Teleport: true,
},
},
})
await flushPromises()
const setupState = (wrapper.vm as any).$?.setupState
await setupState.exportToCSV()
expect(exportedBlob).not.toBeNull()
expect(clickSpy).toHaveBeenCalled()
expect(showSuccess).toHaveBeenCalled()
window.URL.createObjectURL = originalCreateObjectURL
window.URL.revokeObjectURL = originalRevokeObjectURL
clickSpy.mockRestore()
})
})