mirror of
https://github.com/Wei-Shaw/sub2api.git
synced 2026-03-30 00:31:24 +00:00
fix(billing): 修复 OpenAI fast 档位计费并补齐展示
- 打通 service_tier 在 OpenAI HTTP、WS、passthrough 与 usage 记录中的传递 - 修正 priority/flex 计费逻辑,并将 fast 归一化为 priority - 在用户端和管理端补齐服务档位与计费明细展示 - 补齐前后端测试,并修复 WS 限流信号重复持久化导致的全量回归失败 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -475,6 +475,7 @@ func usageLogFromServiceUser(l *service.UsageLog) UsageLog {
|
||||
AccountID: l.AccountID,
|
||||
RequestID: l.RequestID,
|
||||
Model: l.Model,
|
||||
ServiceTier: l.ServiceTier,
|
||||
ReasoningEffort: l.ReasoningEffort,
|
||||
GroupID: l.GroupID,
|
||||
SubscriptionID: l.SubscriptionID,
|
||||
|
||||
@@ -71,3 +71,29 @@ func TestRequestTypeStringPtrNil(t *testing.T) {
|
||||
t.Parallel()
|
||||
require.Nil(t, requestTypeStringPtr(nil))
|
||||
}
|
||||
|
||||
func TestUsageLogFromService_IncludesServiceTierForUserAndAdmin(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
serviceTier := "priority"
|
||||
log := &service.UsageLog{
|
||||
RequestID: "req_3",
|
||||
Model: "gpt-5.4",
|
||||
ServiceTier: &serviceTier,
|
||||
AccountRateMultiplier: f64Ptr(1.5),
|
||||
}
|
||||
|
||||
userDTO := UsageLogFromService(log)
|
||||
adminDTO := UsageLogFromServiceAdmin(log)
|
||||
|
||||
require.NotNil(t, userDTO.ServiceTier)
|
||||
require.Equal(t, serviceTier, *userDTO.ServiceTier)
|
||||
require.NotNil(t, adminDTO.ServiceTier)
|
||||
require.Equal(t, serviceTier, *adminDTO.ServiceTier)
|
||||
require.NotNil(t, adminDTO.AccountRateMultiplier)
|
||||
require.InDelta(t, 1.5, *adminDTO.AccountRateMultiplier, 1e-12)
|
||||
}
|
||||
|
||||
func f64Ptr(value float64) *float64 {
|
||||
return &value
|
||||
}
|
||||
|
||||
@@ -315,6 +315,8 @@ type UsageLog struct {
|
||||
AccountID int64 `json:"account_id"`
|
||||
RequestID string `json:"request_id"`
|
||||
Model string `json:"model"`
|
||||
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
|
||||
ServiceTier *string `json:"service_tier,omitempty"`
|
||||
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API).
|
||||
// nil means not provided / not applicable.
|
||||
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
|
||||
|
||||
@@ -22,7 +22,7 @@ import (
|
||||
"github.com/lib/pq"
|
||||
)
|
||||
|
||||
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, reasoning_effort, cache_ttl_overridden, created_at"
|
||||
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, service_tier, reasoning_effort, cache_ttl_overridden, created_at"
|
||||
|
||||
// dateFormatWhitelist 将 granularity 参数映射为 PostgreSQL TO_CHAR 格式字符串,防止外部输入直接拼入 SQL
|
||||
var dateFormatWhitelist = map[string]string{
|
||||
@@ -135,6 +135,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
||||
image_count,
|
||||
image_size,
|
||||
media_type,
|
||||
service_tier,
|
||||
reasoning_effort,
|
||||
cache_ttl_overridden,
|
||||
created_at
|
||||
@@ -144,7 +145,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
||||
$8, $9, $10, $11,
|
||||
$12, $13,
|
||||
$14, $15, $16, $17, $18, $19,
|
||||
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35
|
||||
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36
|
||||
)
|
||||
ON CONFLICT (request_id, api_key_id) DO NOTHING
|
||||
RETURNING id, created_at
|
||||
@@ -158,6 +159,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
||||
ipAddress := nullString(log.IPAddress)
|
||||
imageSize := nullString(log.ImageSize)
|
||||
mediaType := nullString(log.MediaType)
|
||||
serviceTier := nullString(log.ServiceTier)
|
||||
reasoningEffort := nullString(log.ReasoningEffort)
|
||||
|
||||
var requestIDArg any
|
||||
@@ -198,6 +200,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
||||
log.ImageCount,
|
||||
imageSize,
|
||||
mediaType,
|
||||
serviceTier,
|
||||
reasoningEffort,
|
||||
log.CacheTTLOverridden,
|
||||
createdAt,
|
||||
@@ -2505,6 +2508,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
||||
imageCount int
|
||||
imageSize sql.NullString
|
||||
mediaType sql.NullString
|
||||
serviceTier sql.NullString
|
||||
reasoningEffort sql.NullString
|
||||
cacheTTLOverridden bool
|
||||
createdAt time.Time
|
||||
@@ -2544,6 +2548,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
||||
&imageCount,
|
||||
&imageSize,
|
||||
&mediaType,
|
||||
&serviceTier,
|
||||
&reasoningEffort,
|
||||
&cacheTTLOverridden,
|
||||
&createdAt,
|
||||
@@ -2614,6 +2619,9 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
||||
if mediaType.Valid {
|
||||
log.MediaType = &mediaType.String
|
||||
}
|
||||
if serviceTier.Valid {
|
||||
log.ServiceTier = &serviceTier.String
|
||||
}
|
||||
if reasoningEffort.Valid {
|
||||
log.ReasoningEffort = &reasoningEffort.String
|
||||
}
|
||||
|
||||
@@ -71,6 +71,7 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
|
||||
log.ImageCount,
|
||||
sqlmock.AnyArg(), // image_size
|
||||
sqlmock.AnyArg(), // media_type
|
||||
sqlmock.AnyArg(), // service_tier
|
||||
sqlmock.AnyArg(), // reasoning_effort
|
||||
log.CacheTTLOverridden,
|
||||
createdAt,
|
||||
@@ -81,12 +82,76 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.True(t, inserted)
|
||||
require.Equal(t, int64(99), log.ID)
|
||||
require.Nil(t, log.ServiceTier)
|
||||
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
||||
require.True(t, log.Stream)
|
||||
require.True(t, log.OpenAIWSMode)
|
||||
require.NoError(t, mock.ExpectationsWereMet())
|
||||
}
|
||||
|
||||
func TestUsageLogRepositoryCreate_PersistsServiceTier(t *testing.T) {
|
||||
db, mock := newSQLMock(t)
|
||||
repo := &usageLogRepository{sql: db}
|
||||
|
||||
createdAt := time.Date(2025, 1, 2, 12, 0, 0, 0, time.UTC)
|
||||
serviceTier := "priority"
|
||||
log := &service.UsageLog{
|
||||
UserID: 1,
|
||||
APIKeyID: 2,
|
||||
AccountID: 3,
|
||||
RequestID: "req-service-tier",
|
||||
Model: "gpt-5.4",
|
||||
ServiceTier: &serviceTier,
|
||||
CreatedAt: createdAt,
|
||||
}
|
||||
|
||||
mock.ExpectQuery("INSERT INTO usage_logs").
|
||||
WithArgs(
|
||||
log.UserID,
|
||||
log.APIKeyID,
|
||||
log.AccountID,
|
||||
log.RequestID,
|
||||
log.Model,
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
log.InputTokens,
|
||||
log.OutputTokens,
|
||||
log.CacheCreationTokens,
|
||||
log.CacheReadTokens,
|
||||
log.CacheCreation5mTokens,
|
||||
log.CacheCreation1hTokens,
|
||||
log.InputCost,
|
||||
log.OutputCost,
|
||||
log.CacheCreationCost,
|
||||
log.CacheReadCost,
|
||||
log.TotalCost,
|
||||
log.ActualCost,
|
||||
log.RateMultiplier,
|
||||
log.AccountRateMultiplier,
|
||||
log.BillingType,
|
||||
int16(service.RequestTypeSync),
|
||||
false,
|
||||
false,
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
log.ImageCount,
|
||||
sqlmock.AnyArg(),
|
||||
sqlmock.AnyArg(),
|
||||
serviceTier,
|
||||
sqlmock.AnyArg(),
|
||||
log.CacheTTLOverridden,
|
||||
createdAt,
|
||||
).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "created_at"}).AddRow(int64(100), createdAt))
|
||||
|
||||
inserted, err := repo.Create(context.Background(), log)
|
||||
require.NoError(t, err)
|
||||
require.True(t, inserted)
|
||||
require.NoError(t, mock.ExpectationsWereMet())
|
||||
}
|
||||
|
||||
func TestUsageLogRepositoryListWithFiltersRequestTypePriority(t *testing.T) {
|
||||
db, mock := newSQLMock(t)
|
||||
repo := &usageLogRepository{sql: db}
|
||||
@@ -280,11 +345,14 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
|
||||
0,
|
||||
sql.NullString{},
|
||||
sql.NullString{},
|
||||
sql.NullString{Valid: true, String: "priority"},
|
||||
sql.NullString{},
|
||||
false,
|
||||
now,
|
||||
}})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, log.ServiceTier)
|
||||
require.Equal(t, "priority", *log.ServiceTier)
|
||||
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
||||
require.True(t, log.Stream)
|
||||
require.True(t, log.OpenAIWSMode)
|
||||
@@ -316,13 +384,53 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
|
||||
0,
|
||||
sql.NullString{},
|
||||
sql.NullString{},
|
||||
sql.NullString{Valid: true, String: "flex"},
|
||||
sql.NullString{},
|
||||
false,
|
||||
now,
|
||||
}})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, log.ServiceTier)
|
||||
require.Equal(t, "flex", *log.ServiceTier)
|
||||
require.Equal(t, service.RequestTypeStream, log.RequestType)
|
||||
require.True(t, log.Stream)
|
||||
require.False(t, log.OpenAIWSMode)
|
||||
})
|
||||
|
||||
t.Run("service_tier_is_scanned", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
log, err := scanUsageLog(usageLogScannerStub{values: []any{
|
||||
int64(3),
|
||||
int64(12),
|
||||
int64(22),
|
||||
int64(32),
|
||||
sql.NullString{Valid: true, String: "req-3"},
|
||||
"gpt-5.4",
|
||||
sql.NullInt64{},
|
||||
sql.NullInt64{},
|
||||
1, 2, 3, 4, 5, 6,
|
||||
0.1, 0.2, 0.3, 0.4, 1.0, 0.9,
|
||||
1.0,
|
||||
sql.NullFloat64{},
|
||||
int16(service.BillingTypeBalance),
|
||||
int16(service.RequestTypeSync),
|
||||
false,
|
||||
false,
|
||||
sql.NullInt64{},
|
||||
sql.NullInt64{},
|
||||
sql.NullString{},
|
||||
sql.NullString{},
|
||||
0,
|
||||
sql.NullString{},
|
||||
sql.NullString{},
|
||||
sql.NullString{Valid: true, String: "priority"},
|
||||
sql.NullString{},
|
||||
false,
|
||||
now,
|
||||
}})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, log.ServiceTier)
|
||||
require.Equal(t, "priority", *log.ServiceTier)
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
@@ -210,8 +210,10 @@ func TestAPIContracts(t *testing.T) {
|
||||
"sora_video_price_per_request": null,
|
||||
"sora_video_price_per_request_hd": null,
|
||||
"claude_code_only": false,
|
||||
"allow_messages_dispatch": false,
|
||||
"fallback_group_id": null,
|
||||
"fallback_group_id_on_invalid_request": null,
|
||||
"allow_messages_dispatch": false,
|
||||
"created_at": "2025-01-02T03:04:05Z",
|
||||
"updated_at": "2025-01-02T03:04:05Z"
|
||||
}
|
||||
|
||||
@@ -43,16 +43,19 @@ type BillingCache interface {
|
||||
|
||||
// ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致)
|
||||
type ModelPricing struct {
|
||||
InputPricePerToken float64 // 每token输入价格 (USD)
|
||||
OutputPricePerToken float64 // 每token输出价格 (USD)
|
||||
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
||||
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
||||
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
||||
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
||||
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
||||
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
|
||||
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
|
||||
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
|
||||
InputPricePerToken float64 // 每token输入价格 (USD)
|
||||
InputPricePerTokenPriority float64 // priority service tier 下每token输入价格 (USD)
|
||||
OutputPricePerToken float64 // 每token输出价格 (USD)
|
||||
OutputPricePerTokenPriority float64 // priority service tier 下每token输出价格 (USD)
|
||||
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
||||
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
||||
CacheReadPricePerTokenPriority float64 // priority service tier 下缓存读取每token价格 (USD)
|
||||
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
||||
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
||||
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
||||
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
|
||||
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
|
||||
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -61,6 +64,28 @@ const (
|
||||
openAIGPT54LongContextOutputMultiplier = 1.5
|
||||
)
|
||||
|
||||
func normalizeBillingServiceTier(serviceTier string) string {
|
||||
return strings.ToLower(strings.TrimSpace(serviceTier))
|
||||
}
|
||||
|
||||
func usePriorityServiceTierPricing(serviceTier string, pricing *ModelPricing) bool {
|
||||
if pricing == nil || normalizeBillingServiceTier(serviceTier) != "priority" {
|
||||
return false
|
||||
}
|
||||
return pricing.InputPricePerTokenPriority > 0 || pricing.OutputPricePerTokenPriority > 0 || pricing.CacheReadPricePerTokenPriority > 0
|
||||
}
|
||||
|
||||
func serviceTierCostMultiplier(serviceTier string) float64 {
|
||||
switch normalizeBillingServiceTier(serviceTier) {
|
||||
case "priority":
|
||||
return 2.0
|
||||
case "flex":
|
||||
return 0.5
|
||||
default:
|
||||
return 1.0
|
||||
}
|
||||
}
|
||||
|
||||
// UsageTokens 使用的token数量
|
||||
type UsageTokens struct {
|
||||
InputTokens int
|
||||
@@ -173,30 +198,60 @@ func (s *BillingService) initFallbackPricing() {
|
||||
|
||||
// OpenAI GPT-5.1(本地兜底,防止动态定价不可用时拒绝计费)
|
||||
s.fallbackPrices["gpt-5.1"] = &ModelPricing{
|
||||
InputPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||
OutputPricePerToken: 10e-6, // $10 per MTok
|
||||
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||
CacheReadPricePerToken: 0.125e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
InputPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||
InputPricePerTokenPriority: 2.5e-6, // $2.5 per MTok
|
||||
OutputPricePerToken: 10e-6, // $10 per MTok
|
||||
OutputPricePerTokenPriority: 20e-6, // $20 per MTok
|
||||
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||
CacheReadPricePerToken: 0.125e-6,
|
||||
CacheReadPricePerTokenPriority: 0.25e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
}
|
||||
// OpenAI GPT-5.4(业务指定价格)
|
||||
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
|
||||
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||
OutputPricePerToken: 15e-6, // $15 per MTok
|
||||
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
||||
SupportsCacheBreakdown: false,
|
||||
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
|
||||
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
|
||||
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
|
||||
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||
InputPricePerTokenPriority: 5e-6, // $5 per MTok
|
||||
OutputPricePerToken: 15e-6, // $15 per MTok
|
||||
OutputPricePerTokenPriority: 30e-6, // $30 per MTok
|
||||
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
||||
CacheReadPricePerTokenPriority: 0.5e-6, // $0.5 per MTok
|
||||
SupportsCacheBreakdown: false,
|
||||
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
|
||||
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
|
||||
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
|
||||
}
|
||||
// OpenAI GPT-5.2(本地兜底)
|
||||
s.fallbackPrices["gpt-5.2"] = &ModelPricing{
|
||||
InputPricePerToken: 1.75e-6,
|
||||
InputPricePerTokenPriority: 3.5e-6,
|
||||
OutputPricePerToken: 14e-6,
|
||||
OutputPricePerTokenPriority: 28e-6,
|
||||
CacheCreationPricePerToken: 1.75e-6,
|
||||
CacheReadPricePerToken: 0.175e-6,
|
||||
CacheReadPricePerTokenPriority: 0.35e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
}
|
||||
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
|
||||
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
|
||||
InputPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||
OutputPricePerToken: 12e-6, // $12 per MTok
|
||||
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||
CacheReadPricePerToken: 0.15e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
InputPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||
InputPricePerTokenPriority: 3e-6, // $3 per MTok
|
||||
OutputPricePerToken: 12e-6, // $12 per MTok
|
||||
OutputPricePerTokenPriority: 24e-6, // $24 per MTok
|
||||
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||
CacheReadPricePerToken: 0.15e-6,
|
||||
CacheReadPricePerTokenPriority: 0.3e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
}
|
||||
s.fallbackPrices["gpt-5.2-codex"] = &ModelPricing{
|
||||
InputPricePerToken: 1.75e-6,
|
||||
InputPricePerTokenPriority: 3.5e-6,
|
||||
OutputPricePerToken: 14e-6,
|
||||
OutputPricePerTokenPriority: 28e-6,
|
||||
CacheCreationPricePerToken: 1.75e-6,
|
||||
CacheReadPricePerToken: 0.175e-6,
|
||||
CacheReadPricePerTokenPriority: 0.35e-6,
|
||||
SupportsCacheBreakdown: false,
|
||||
}
|
||||
s.fallbackPrices["gpt-5.3-codex"] = s.fallbackPrices["gpt-5.1-codex"]
|
||||
}
|
||||
@@ -241,6 +296,10 @@ func (s *BillingService) getFallbackPricing(model string) *ModelPricing {
|
||||
switch normalized {
|
||||
case "gpt-5.4":
|
||||
return s.fallbackPrices["gpt-5.4"]
|
||||
case "gpt-5.2":
|
||||
return s.fallbackPrices["gpt-5.2"]
|
||||
case "gpt-5.2-codex":
|
||||
return s.fallbackPrices["gpt-5.2-codex"]
|
||||
case "gpt-5.3-codex":
|
||||
return s.fallbackPrices["gpt-5.3-codex"]
|
||||
case "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "codex-mini-latest":
|
||||
@@ -269,16 +328,19 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
||||
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
|
||||
enableBreakdown := price1h > 0 && price1h > price5m
|
||||
return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
|
||||
InputPricePerToken: litellmPricing.InputCostPerToken,
|
||||
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
||||
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
||||
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
||||
CacheCreation5mPrice: price5m,
|
||||
CacheCreation1hPrice: price1h,
|
||||
SupportsCacheBreakdown: enableBreakdown,
|
||||
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
|
||||
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
|
||||
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
|
||||
InputPricePerToken: litellmPricing.InputCostPerToken,
|
||||
InputPricePerTokenPriority: litellmPricing.InputCostPerTokenPriority,
|
||||
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
||||
OutputPricePerTokenPriority: litellmPricing.OutputCostPerTokenPriority,
|
||||
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
||||
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
||||
CacheReadPricePerTokenPriority: litellmPricing.CacheReadInputTokenCostPriority,
|
||||
CacheCreation5mPrice: price5m,
|
||||
CacheCreation1hPrice: price1h,
|
||||
SupportsCacheBreakdown: enableBreakdown,
|
||||
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
|
||||
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
|
||||
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
|
||||
}), nil
|
||||
}
|
||||
}
|
||||
@@ -295,6 +357,10 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
||||
|
||||
// CalculateCost 计算使用费用
|
||||
func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMultiplier float64) (*CostBreakdown, error) {
|
||||
return s.CalculateCostWithServiceTier(model, tokens, rateMultiplier, "")
|
||||
}
|
||||
|
||||
func (s *BillingService) CalculateCostWithServiceTier(model string, tokens UsageTokens, rateMultiplier float64, serviceTier string) (*CostBreakdown, error) {
|
||||
pricing, err := s.GetModelPricing(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -303,6 +369,21 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
||||
breakdown := &CostBreakdown{}
|
||||
inputPricePerToken := pricing.InputPricePerToken
|
||||
outputPricePerToken := pricing.OutputPricePerToken
|
||||
cacheReadPricePerToken := pricing.CacheReadPricePerToken
|
||||
tierMultiplier := 1.0
|
||||
if usePriorityServiceTierPricing(serviceTier, pricing) {
|
||||
if pricing.InputPricePerTokenPriority > 0 {
|
||||
inputPricePerToken = pricing.InputPricePerTokenPriority
|
||||
}
|
||||
if pricing.OutputPricePerTokenPriority > 0 {
|
||||
outputPricePerToken = pricing.OutputPricePerTokenPriority
|
||||
}
|
||||
if pricing.CacheReadPricePerTokenPriority > 0 {
|
||||
cacheReadPricePerToken = pricing.CacheReadPricePerTokenPriority
|
||||
}
|
||||
} else {
|
||||
tierMultiplier = serviceTierCostMultiplier(serviceTier)
|
||||
}
|
||||
if s.shouldApplySessionLongContextPricing(tokens, pricing) {
|
||||
inputPricePerToken *= pricing.LongContextInputMultiplier
|
||||
outputPricePerToken *= pricing.LongContextOutputMultiplier
|
||||
@@ -329,7 +410,14 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
||||
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken
|
||||
}
|
||||
|
||||
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * pricing.CacheReadPricePerToken
|
||||
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * cacheReadPricePerToken
|
||||
|
||||
if tierMultiplier != 1.0 {
|
||||
breakdown.InputCost *= tierMultiplier
|
||||
breakdown.OutputCost *= tierMultiplier
|
||||
breakdown.CacheCreationCost *= tierMultiplier
|
||||
breakdown.CacheReadCost *= tierMultiplier
|
||||
}
|
||||
|
||||
// 计算总费用
|
||||
breakdown.TotalCost = breakdown.InputCost + breakdown.OutputCost +
|
||||
|
||||
@@ -522,3 +522,189 @@ func TestCalculateCost_LargeTokenCount(t *testing.T) {
|
||||
require.False(t, math.IsNaN(cost.TotalCost))
|
||||
require.False(t, math.IsInf(cost.TotalCost, 0))
|
||||
}
|
||||
|
||||
func TestServiceTierCostMultiplier(t *testing.T) {
|
||||
require.InDelta(t, 2.0, serviceTierCostMultiplier("priority"), 1e-12)
|
||||
require.InDelta(t, 2.0, serviceTierCostMultiplier(" Priority "), 1e-12)
|
||||
require.InDelta(t, 0.5, serviceTierCostMultiplier("flex"), 1e-12)
|
||||
require.InDelta(t, 1.0, serviceTierCostMultiplier(""), 1e-12)
|
||||
require.InDelta(t, 1.0, serviceTierCostMultiplier("default"), 1e-12)
|
||||
}
|
||||
|
||||
func TestCalculateCostWithServiceTier_OpenAIPriorityUsesPriorityPricing(t *testing.T) {
|
||||
svc := newTestBillingService()
|
||||
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheReadTokens: 20}
|
||||
|
||||
baseCost, err := svc.CalculateCost("gpt-5.1-codex", tokens, 1.0)
|
||||
require.NoError(t, err)
|
||||
|
||||
priorityCost, err := svc.CalculateCostWithServiceTier("gpt-5.1-codex", tokens, 1.0, "priority")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestCalculateCostWithServiceTier_FlexAppliesHalfMultiplier(t *testing.T) {
|
||||
svc := newTestBillingService()
|
||||
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
|
||||
|
||||
baseCost, err := svc.CalculateCost("gpt-5.4", tokens, 1.0)
|
||||
require.NoError(t, err)
|
||||
|
||||
flexCost, err := svc.CalculateCostWithServiceTier("gpt-5.4", tokens, 1.0, "flex")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.InDelta(t, baseCost.InputCost*0.5, flexCost.InputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.OutputCost*0.5, flexCost.OutputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheCreationCost*0.5, flexCost.CacheCreationCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheReadCost*0.5, flexCost.CacheReadCost, 1e-10)
|
||||
require.InDelta(t, baseCost.TotalCost*0.5, flexCost.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWithoutExplicitPriorityPrice(t *testing.T) {
|
||||
svc := newTestBillingService()
|
||||
tokens := UsageTokens{InputTokens: 120, OutputTokens: 30, CacheCreationTokens: 12, CacheReadTokens: 8}
|
||||
|
||||
baseCost, err := svc.CalculateCost("claude-sonnet-4", tokens, 1.0)
|
||||
require.NoError(t, err)
|
||||
|
||||
priorityCost, err := svc.CalculateCostWithServiceTier("claude-sonnet-4", tokens, 1.0, "priority")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestBillingServiceGetModelPricing_UsesDynamicPriorityFields(t *testing.T) {
|
||||
pricingSvc := &PricingService{
|
||||
pricingData: map[string]*LiteLLMModelPricing{
|
||||
"gpt-5.4": {
|
||||
InputCostPerToken: 2.5e-6,
|
||||
InputCostPerTokenPriority: 5e-6,
|
||||
OutputCostPerToken: 15e-6,
|
||||
OutputCostPerTokenPriority: 30e-6,
|
||||
CacheCreationInputTokenCost: 2.5e-6,
|
||||
CacheReadInputTokenCost: 0.25e-6,
|
||||
CacheReadInputTokenCostPriority: 0.5e-6,
|
||||
LongContextInputTokenThreshold: 272000,
|
||||
LongContextInputCostMultiplier: 2.0,
|
||||
LongContextOutputCostMultiplier: 1.5,
|
||||
},
|
||||
},
|
||||
}
|
||||
svc := NewBillingService(&config.Config{}, pricingSvc)
|
||||
|
||||
pricing, err := svc.GetModelPricing("gpt-5.4")
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 5e-6, pricing.InputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 30e-6, pricing.OutputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
|
||||
require.InDelta(t, 0.5e-6, pricing.CacheReadPricePerTokenPriority, 1e-12)
|
||||
require.Equal(t, 272000, pricing.LongContextInputThreshold)
|
||||
require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12)
|
||||
require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12)
|
||||
}
|
||||
|
||||
func TestBillingServiceGetModelPricing_OpenAIFallbackGpt52Variants(t *testing.T) {
|
||||
svc := newTestBillingService()
|
||||
|
||||
gpt52, err := svc.GetModelPricing("gpt-5.2")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gpt52)
|
||||
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
|
||||
|
||||
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gpt52Codex)
|
||||
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
|
||||
}
|
||||
|
||||
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWhenExplicitPriceMissing(t *testing.T) {
|
||||
svc := NewBillingService(&config.Config{}, &PricingService{
|
||||
pricingData: map[string]*LiteLLMModelPricing{
|
||||
"custom-no-priority": {
|
||||
InputCostPerToken: 1e-6,
|
||||
OutputCostPerToken: 2e-6,
|
||||
CacheCreationInputTokenCost: 0.5e-6,
|
||||
CacheReadInputTokenCost: 0.25e-6,
|
||||
},
|
||||
},
|
||||
})
|
||||
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
|
||||
|
||||
baseCost, err := svc.CalculateCost("custom-no-priority", tokens, 1.0)
|
||||
require.NoError(t, err)
|
||||
|
||||
priorityCost, err := svc.CalculateCostWithServiceTier("custom-no-priority", tokens, 1.0, "priority")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
|
||||
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestGetModelPricing_OpenAIGpt52FallbacksExposePriorityPrices(t *testing.T) {
|
||||
svc := newTestBillingService()
|
||||
|
||||
gpt52, err := svc.GetModelPricing("gpt-5.2")
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 14e-6, gpt52.OutputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 28e-6, gpt52.OutputPricePerTokenPriority, 1e-12)
|
||||
|
||||
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 14e-6, gpt52Codex.OutputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
|
||||
}
|
||||
|
||||
func TestGetModelPricing_MapsDynamicPriorityFieldsIntoBillingPricing(t *testing.T) {
|
||||
svc := NewBillingService(&config.Config{}, &PricingService{
|
||||
pricingData: map[string]*LiteLLMModelPricing{
|
||||
"dynamic-tier-model": {
|
||||
InputCostPerToken: 1e-6,
|
||||
InputCostPerTokenPriority: 2e-6,
|
||||
OutputCostPerToken: 3e-6,
|
||||
OutputCostPerTokenPriority: 6e-6,
|
||||
CacheCreationInputTokenCost: 4e-6,
|
||||
CacheCreationInputTokenCostAbove1hr: 5e-6,
|
||||
CacheReadInputTokenCost: 7e-7,
|
||||
CacheReadInputTokenCostPriority: 8e-7,
|
||||
LongContextInputTokenThreshold: 999,
|
||||
LongContextInputCostMultiplier: 1.5,
|
||||
LongContextOutputCostMultiplier: 1.25,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
pricing, err := svc.GetModelPricing("dynamic-tier-model")
|
||||
require.NoError(t, err)
|
||||
require.InDelta(t, 1e-6, pricing.InputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 2e-6, pricing.InputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 3e-6, pricing.OutputPricePerToken, 1e-12)
|
||||
require.InDelta(t, 6e-6, pricing.OutputPricePerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 4e-6, pricing.CacheCreation5mPrice, 1e-12)
|
||||
require.InDelta(t, 5e-6, pricing.CacheCreation1hPrice, 1e-12)
|
||||
require.True(t, pricing.SupportsCacheBreakdown)
|
||||
require.InDelta(t, 7e-7, pricing.CacheReadPricePerToken, 1e-12)
|
||||
require.InDelta(t, 8e-7, pricing.CacheReadPricePerTokenPriority, 1e-12)
|
||||
require.Equal(t, 999, pricing.LongContextInputThreshold)
|
||||
require.InDelta(t, 1.5, pricing.LongContextInputMultiplier, 1e-12)
|
||||
require.InDelta(t, 1.25, pricing.LongContextOutputMultiplier, 1e-12)
|
||||
}
|
||||
|
||||
@@ -334,3 +334,225 @@ func TestOpenAIGatewayServiceRecordUsage_ClampsActualInputTokensToZero(t *testin
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
require.Equal(t, 0, usageRepo.lastLog.InputTokens)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_Gpt54LongContextBillsWholeSession(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_gpt54_long_context",
|
||||
Usage: OpenAIUsage{
|
||||
InputTokens: 300000,
|
||||
OutputTokens: 2000,
|
||||
},
|
||||
Model: "gpt-5.4-2026-03-05",
|
||||
Duration: time.Second,
|
||||
},
|
||||
APIKey: &APIKey{ID: 1014},
|
||||
User: &User{ID: 2014},
|
||||
Account: &Account{ID: 3014},
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
|
||||
expectedInput := 300000 * 2.5e-6 * 2.0
|
||||
expectedOutput := 2000 * 15e-6 * 1.5
|
||||
require.InDelta(t, expectedInput, usageRepo.lastLog.InputCost, 1e-10)
|
||||
require.InDelta(t, expectedOutput, usageRepo.lastLog.OutputCost, 1e-10)
|
||||
require.InDelta(t, expectedInput+expectedOutput, usageRepo.lastLog.TotalCost, 1e-10)
|
||||
require.InDelta(t, (expectedInput+expectedOutput)*1.1, usageRepo.lastLog.ActualCost, 1e-10)
|
||||
require.Equal(t, 1, userRepo.deductCalls)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_ServiceTierPriorityUsesFastPricing(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
serviceTier := "priority"
|
||||
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50}
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_service_tier_priority",
|
||||
ServiceTier: &serviceTier,
|
||||
Usage: usage,
|
||||
Model: "gpt-5.4",
|
||||
Duration: time.Second,
|
||||
},
|
||||
APIKey: &APIKey{ID: 1015},
|
||||
User: &User{ID: 2015},
|
||||
Account: &Account{ID: 3015},
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
require.NotNil(t, usageRepo.lastLog.ServiceTier)
|
||||
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
|
||||
|
||||
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 100, OutputTokens: 50}, 1.0)
|
||||
require.NoError(t, calcErr)
|
||||
require.InDelta(t, baseCost.TotalCost*2, usageRepo.lastLog.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_ServiceTierFlexHalvesCost(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
serviceTier := "flex"
|
||||
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50, CacheReadInputTokens: 20}
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_service_tier_flex",
|
||||
ServiceTier: &serviceTier,
|
||||
Usage: usage,
|
||||
Model: "gpt-5.4",
|
||||
Duration: time.Second,
|
||||
},
|
||||
APIKey: &APIKey{ID: 1016},
|
||||
User: &User{ID: 2016},
|
||||
Account: &Account{ID: 3016},
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
|
||||
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 80, OutputTokens: 50, CacheReadTokens: 20}, 1.0)
|
||||
require.NoError(t, calcErr)
|
||||
require.InDelta(t, baseCost.TotalCost*0.5, usageRepo.lastLog.TotalCost, 1e-10)
|
||||
}
|
||||
|
||||
func TestNormalizeOpenAIServiceTier(t *testing.T) {
|
||||
t.Run("fast maps to priority", func(t *testing.T) {
|
||||
got := normalizeOpenAIServiceTier(" fast ")
|
||||
require.NotNil(t, got)
|
||||
require.Equal(t, "priority", *got)
|
||||
})
|
||||
|
||||
t.Run("default ignored", func(t *testing.T) {
|
||||
require.Nil(t, normalizeOpenAIServiceTier("default"))
|
||||
})
|
||||
|
||||
t.Run("invalid ignored", func(t *testing.T) {
|
||||
require.Nil(t, normalizeOpenAIServiceTier("turbo"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestExtractOpenAIServiceTier(t *testing.T) {
|
||||
require.Equal(t, "priority", *extractOpenAIServiceTier(map[string]any{"service_tier": "fast"}))
|
||||
require.Equal(t, "flex", *extractOpenAIServiceTier(map[string]any{"service_tier": "flex"}))
|
||||
require.Nil(t, extractOpenAIServiceTier(map[string]any{"service_tier": 1}))
|
||||
require.Nil(t, extractOpenAIServiceTier(nil))
|
||||
}
|
||||
|
||||
func TestExtractOpenAIServiceTierFromBody(t *testing.T) {
|
||||
require.Equal(t, "priority", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"fast"}`)))
|
||||
require.Equal(t, "flex", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"flex"}`)))
|
||||
require.Nil(t, extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"default"}`)))
|
||||
require.Nil(t, extractOpenAIServiceTierFromBody(nil))
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_UsesBillingModelAndMetadataFields(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
serviceTier := "priority"
|
||||
reasoning := "high"
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_billing_model_override",
|
||||
BillingModel: "gpt-5.1-codex",
|
||||
Model: "gpt-5.1",
|
||||
ServiceTier: &serviceTier,
|
||||
ReasoningEffort: &reasoning,
|
||||
Usage: OpenAIUsage{
|
||||
InputTokens: 20,
|
||||
OutputTokens: 10,
|
||||
},
|
||||
Duration: 2 * time.Second,
|
||||
FirstTokenMs: func() *int { v := 120; return &v }(),
|
||||
},
|
||||
APIKey: &APIKey{ID: 10, GroupID: i64p(11), Group: &Group{ID: 11, RateMultiplier: 1.2}},
|
||||
User: &User{ID: 20},
|
||||
Account: &Account{ID: 30},
|
||||
UserAgent: "codex-cli/1.0",
|
||||
IPAddress: "127.0.0.1",
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
require.Equal(t, "gpt-5.1-codex", usageRepo.lastLog.Model)
|
||||
require.NotNil(t, usageRepo.lastLog.ServiceTier)
|
||||
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
|
||||
require.NotNil(t, usageRepo.lastLog.ReasoningEffort)
|
||||
require.Equal(t, reasoning, *usageRepo.lastLog.ReasoningEffort)
|
||||
require.NotNil(t, usageRepo.lastLog.UserAgent)
|
||||
require.Equal(t, "codex-cli/1.0", *usageRepo.lastLog.UserAgent)
|
||||
require.NotNil(t, usageRepo.lastLog.IPAddress)
|
||||
require.Equal(t, "127.0.0.1", *usageRepo.lastLog.IPAddress)
|
||||
require.NotNil(t, usageRepo.lastLog.GroupID)
|
||||
require.Equal(t, int64(11), *usageRepo.lastLog.GroupID)
|
||||
require.Equal(t, 1, userRepo.deductCalls)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_SubscriptionBillingSetsSubscriptionFields(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
subscription := &UserSubscription{ID: 99}
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_subscription_billing",
|
||||
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
|
||||
Model: "gpt-5.1",
|
||||
Duration: time.Second,
|
||||
},
|
||||
APIKey: &APIKey{ID: 100, GroupID: i64p(88), Group: &Group{ID: 88, SubscriptionType: SubscriptionTypeSubscription}},
|
||||
User: &User{ID: 200},
|
||||
Account: &Account{ID: 300},
|
||||
Subscription: subscription,
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usageRepo.lastLog)
|
||||
require.Equal(t, BillingTypeSubscription, usageRepo.lastLog.BillingType)
|
||||
require.NotNil(t, usageRepo.lastLog.SubscriptionID)
|
||||
require.Equal(t, subscription.ID, *usageRepo.lastLog.SubscriptionID)
|
||||
require.Equal(t, 1, subRepo.incrementCalls)
|
||||
require.Equal(t, 0, userRepo.deductCalls)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayServiceRecordUsage_SimpleModeSkipsBillingAfterPersist(t *testing.T) {
|
||||
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||
svc.cfg.RunMode = config.RunModeSimple
|
||||
|
||||
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||
Result: &OpenAIForwardResult{
|
||||
RequestID: "resp_simple_mode",
|
||||
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
|
||||
Model: "gpt-5.1",
|
||||
Duration: time.Second,
|
||||
},
|
||||
APIKey: &APIKey{ID: 1000},
|
||||
User: &User{ID: 2000},
|
||||
Account: &Account{ID: 3000},
|
||||
})
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, usageRepo.calls)
|
||||
require.Equal(t, 0, userRepo.deductCalls)
|
||||
require.Equal(t, 0, subRepo.incrementCalls)
|
||||
}
|
||||
|
||||
@@ -213,6 +213,9 @@ type OpenAIForwardResult struct {
|
||||
// This is set by the Anthropic Messages conversion path where
|
||||
// the mapped upstream model differs from the client-facing model.
|
||||
BillingModel string
|
||||
// ServiceTier records the OpenAI Responses API service tier, e.g. "priority" / "flex".
|
||||
// Nil means the request did not specify a recognized tier.
|
||||
ServiceTier *string
|
||||
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
||||
// Stored for usage records display; nil means not provided / not applicable.
|
||||
ReasoningEffort *string
|
||||
@@ -2036,11 +2039,13 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
|
||||
}
|
||||
|
||||
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
|
||||
serviceTier := extractOpenAIServiceTier(reqBody)
|
||||
|
||||
return &OpenAIForwardResult{
|
||||
RequestID: resp.Header.Get("x-request-id"),
|
||||
Usage: *usage,
|
||||
Model: originalModel,
|
||||
ServiceTier: serviceTier,
|
||||
ReasoningEffort: reasoningEffort,
|
||||
Stream: reqStream,
|
||||
OpenAIWSMode: false,
|
||||
@@ -2195,6 +2200,7 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
|
||||
RequestID: resp.Header.Get("x-request-id"),
|
||||
Usage: *usage,
|
||||
Model: reqModel,
|
||||
ServiceTier: extractOpenAIServiceTierFromBody(body),
|
||||
ReasoningEffort: reasoningEffort,
|
||||
Stream: reqStream,
|
||||
OpenAIWSMode: false,
|
||||
@@ -3628,7 +3634,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
||||
if result.BillingModel != "" {
|
||||
billingModel = result.BillingModel
|
||||
}
|
||||
cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
|
||||
serviceTier := ""
|
||||
if result.ServiceTier != nil {
|
||||
serviceTier = strings.TrimSpace(*result.ServiceTier)
|
||||
}
|
||||
cost, err := s.billingService.CalculateCostWithServiceTier(billingModel, tokens, multiplier, serviceTier)
|
||||
if err != nil {
|
||||
cost = &CostBreakdown{ActualCost: 0}
|
||||
}
|
||||
@@ -3649,6 +3659,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
||||
AccountID: account.ID,
|
||||
RequestID: result.RequestID,
|
||||
Model: billingModel,
|
||||
ServiceTier: result.ServiceTier,
|
||||
ReasoningEffort: result.ReasoningEffort,
|
||||
InputTokens: actualInputTokens,
|
||||
OutputTokens: result.Usage.OutputTokens,
|
||||
@@ -4047,6 +4058,40 @@ func extractOpenAIReasoningEffortFromBody(body []byte, requestedModel string) *s
|
||||
return &value
|
||||
}
|
||||
|
||||
func extractOpenAIServiceTier(reqBody map[string]any) *string {
|
||||
if reqBody == nil {
|
||||
return nil
|
||||
}
|
||||
raw, ok := reqBody["service_tier"].(string)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return normalizeOpenAIServiceTier(raw)
|
||||
}
|
||||
|
||||
func extractOpenAIServiceTierFromBody(body []byte) *string {
|
||||
if len(body) == 0 {
|
||||
return nil
|
||||
}
|
||||
return normalizeOpenAIServiceTier(gjson.GetBytes(body, "service_tier").String())
|
||||
}
|
||||
|
||||
func normalizeOpenAIServiceTier(raw string) *string {
|
||||
value := strings.ToLower(strings.TrimSpace(raw))
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
if value == "fast" {
|
||||
value = "priority"
|
||||
}
|
||||
switch value {
|
||||
case "priority", "flex":
|
||||
return &value
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func getOpenAIRequestBodyMap(c *gin.Context, body []byte) (map[string]any, error) {
|
||||
if c != nil {
|
||||
if cached, ok := c.Get(OpenAIParsedRequestBodyKey); ok {
|
||||
|
||||
@@ -671,7 +671,7 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
|
||||
c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", bytes.NewReader(nil))
|
||||
c.Request.Header.Set("User-Agent", "codex_cli_rs/0.1.0")
|
||||
|
||||
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"input":[{"type":"text","text":"hi"}]}`)
|
||||
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"service_tier":"fast","input":[{"type":"text","text":"hi"}]}`)
|
||||
|
||||
upstreamSSE := strings.Join([]string{
|
||||
`data: {"type":"response.output_text.delta","delta":"h"}`,
|
||||
@@ -711,6 +711,8 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
|
||||
require.GreaterOrEqual(t, time.Since(start), time.Duration(0))
|
||||
require.NotNil(t, result.FirstTokenMs)
|
||||
require.GreaterOrEqual(t, *result.FirstTokenMs, 0)
|
||||
require.NotNil(t, result.ServiceTier)
|
||||
require.Equal(t, "priority", *result.ServiceTier)
|
||||
}
|
||||
|
||||
func TestOpenAIGatewayService_OAuthPassthrough_StreamClientDisconnectStillCollectsUsage(t *testing.T) {
|
||||
@@ -777,7 +779,7 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
|
||||
c.Request.Header.Set("User-Agent", "curl/8.0")
|
||||
c.Request.Header.Set("X-Test", "keep")
|
||||
|
||||
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
|
||||
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"service_tier":"flex","max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
|
||||
resp := &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid"}},
|
||||
@@ -803,8 +805,11 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
|
||||
RateMultiplier: f64p(1),
|
||||
}
|
||||
|
||||
_, err := svc.Forward(context.Background(), c, account, originalBody)
|
||||
result, err := svc.Forward(context.Background(), c, account, originalBody)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, result)
|
||||
require.NotNil(t, result.ServiceTier)
|
||||
require.Equal(t, "flex", *result.ServiceTier)
|
||||
require.NotNil(t, upstream.lastReq)
|
||||
require.Equal(t, originalBody, upstream.lastBody)
|
||||
require.Equal(t, "https://api.openai.com/v1/responses", upstream.lastReq.URL.String())
|
||||
|
||||
@@ -2302,6 +2302,7 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
|
||||
RequestID: responseID,
|
||||
Usage: *usage,
|
||||
Model: originalModel,
|
||||
ServiceTier: extractOpenAIServiceTier(reqBody),
|
||||
ReasoningEffort: extractOpenAIReasoningEffort(reqBody, originalModel),
|
||||
Stream: reqStream,
|
||||
OpenAIWSMode: true,
|
||||
@@ -2913,6 +2914,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
|
||||
RequestID: responseID,
|
||||
Usage: usage,
|
||||
Model: originalModel,
|
||||
ServiceTier: extractOpenAIServiceTierFromBody(payload),
|
||||
ReasoningEffort: extractOpenAIReasoningEffortFromBody(payload, originalModel),
|
||||
Stream: reqStream,
|
||||
OpenAIWSMode: true,
|
||||
|
||||
@@ -399,7 +399,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
|
||||
}()
|
||||
|
||||
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false}`))
|
||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false,"service_tier":"fast"}`))
|
||||
cancelWrite()
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -424,6 +424,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
|
||||
require.True(t, result.OpenAIWSMode)
|
||||
require.Equal(t, 2, result.Usage.InputTokens)
|
||||
require.Equal(t, 3, result.Usage.OutputTokens)
|
||||
require.NotNil(t, result.ServiceTier)
|
||||
require.Equal(t, "priority", *result.ServiceTier)
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("未收到 passthrough turn 结果回调")
|
||||
}
|
||||
@@ -2593,7 +2595,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
|
||||
require.NoError(t, err)
|
||||
|
||||
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false}`))
|
||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false,"service_tier":"flex"}`))
|
||||
cancelWrite()
|
||||
require.NoError(t, err)
|
||||
// 立即关闭客户端,模拟客户端在 relay 期间断连。
|
||||
@@ -2611,6 +2613,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
|
||||
require.Equal(t, "resp_ingress_disconnect", result.RequestID)
|
||||
require.Equal(t, 2, result.Usage.InputTokens)
|
||||
require.Equal(t, 1, result.Usage.OutputTokens)
|
||||
require.NotNil(t, result.ServiceTier)
|
||||
require.Equal(t, "flex", *result.ServiceTier)
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("未收到断连后的 turn 结果回调")
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
||||
return errors.New("token is empty")
|
||||
}
|
||||
requestModel := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "model").String())
|
||||
requestServiceTier := extractOpenAIServiceTierFromBody(firstClientMessage)
|
||||
requestPreviousResponseID := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "previous_response_id").String())
|
||||
logOpenAIWSV2Passthrough(
|
||||
"relay_start account_id=%d model=%s previous_response_id=%s first_message_type=%s first_message_bytes=%d",
|
||||
@@ -178,6 +179,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
||||
CacheReadInputTokens: turn.Usage.CacheReadInputTokens,
|
||||
},
|
||||
Model: turn.RequestModel,
|
||||
ServiceTier: requestServiceTier,
|
||||
Stream: true,
|
||||
OpenAIWSMode: true,
|
||||
ResponseHeaders: cloneHeader(handshakeHeaders),
|
||||
@@ -225,6 +227,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
||||
CacheReadInputTokens: relayResult.Usage.CacheReadInputTokens,
|
||||
},
|
||||
Model: relayResult.RequestModel,
|
||||
ServiceTier: requestServiceTier,
|
||||
Stream: true,
|
||||
OpenAIWSMode: true,
|
||||
ResponseHeaders: cloneHeader(handshakeHeaders),
|
||||
|
||||
@@ -40,13 +40,17 @@ var (
|
||||
// 只保留我们需要的字段,使用指针来处理可能缺失的值
|
||||
type LiteLLMModelPricing struct {
|
||||
InputCostPerToken float64 `json:"input_cost_per_token"`
|
||||
InputCostPerTokenPriority float64 `json:"input_cost_per_token_priority"`
|
||||
OutputCostPerToken float64 `json:"output_cost_per_token"`
|
||||
OutputCostPerTokenPriority float64 `json:"output_cost_per_token_priority"`
|
||||
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
|
||||
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
||||
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
|
||||
CacheReadInputTokenCostPriority float64 `json:"cache_read_input_token_cost_priority"`
|
||||
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
|
||||
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
|
||||
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
|
||||
SupportsServiceTier bool `json:"supports_service_tier"`
|
||||
LiteLLMProvider string `json:"litellm_provider"`
|
||||
Mode string `json:"mode"`
|
||||
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
||||
@@ -62,10 +66,14 @@ type PricingRemoteClient interface {
|
||||
// LiteLLMRawEntry 用于解析原始JSON数据
|
||||
type LiteLLMRawEntry struct {
|
||||
InputCostPerToken *float64 `json:"input_cost_per_token"`
|
||||
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority"`
|
||||
OutputCostPerToken *float64 `json:"output_cost_per_token"`
|
||||
OutputCostPerTokenPriority *float64 `json:"output_cost_per_token_priority"`
|
||||
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"`
|
||||
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
||||
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"`
|
||||
CacheReadInputTokenCostPriority *float64 `json:"cache_read_input_token_cost_priority"`
|
||||
SupportsServiceTier bool `json:"supports_service_tier"`
|
||||
LiteLLMProvider string `json:"litellm_provider"`
|
||||
Mode string `json:"mode"`
|
||||
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
||||
@@ -324,14 +332,21 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
|
||||
LiteLLMProvider: entry.LiteLLMProvider,
|
||||
Mode: entry.Mode,
|
||||
SupportsPromptCaching: entry.SupportsPromptCaching,
|
||||
SupportsServiceTier: entry.SupportsServiceTier,
|
||||
}
|
||||
|
||||
if entry.InputCostPerToken != nil {
|
||||
pricing.InputCostPerToken = *entry.InputCostPerToken
|
||||
}
|
||||
if entry.InputCostPerTokenPriority != nil {
|
||||
pricing.InputCostPerTokenPriority = *entry.InputCostPerTokenPriority
|
||||
}
|
||||
if entry.OutputCostPerToken != nil {
|
||||
pricing.OutputCostPerToken = *entry.OutputCostPerToken
|
||||
}
|
||||
if entry.OutputCostPerTokenPriority != nil {
|
||||
pricing.OutputCostPerTokenPriority = *entry.OutputCostPerTokenPriority
|
||||
}
|
||||
if entry.CacheCreationInputTokenCost != nil {
|
||||
pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost
|
||||
}
|
||||
@@ -341,6 +356,9 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
|
||||
if entry.CacheReadInputTokenCost != nil {
|
||||
pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost
|
||||
}
|
||||
if entry.CacheReadInputTokenCostPriority != nil {
|
||||
pricing.CacheReadInputTokenCostPriority = *entry.CacheReadInputTokenCostPriority
|
||||
}
|
||||
if entry.OutputCostPerImage != nil {
|
||||
pricing.OutputCostPerImage = *entry.OutputCostPerImage
|
||||
}
|
||||
|
||||
@@ -1,11 +1,40 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParsePricingData_ParsesPriorityAndServiceTierFields(t *testing.T) {
|
||||
svc := &PricingService{}
|
||||
body := []byte(`{
|
||||
"gpt-5.4": {
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"input_cost_per_token_priority": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"output_cost_per_token_priority": 0.00003,
|
||||
"cache_creation_input_token_cost": 0.0000025,
|
||||
"cache_read_input_token_cost": 0.00000025,
|
||||
"cache_read_input_token_cost_priority": 0.0000005,
|
||||
"supports_service_tier": true,
|
||||
"supports_prompt_caching": true,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
}
|
||||
}`)
|
||||
|
||||
data, err := svc.parsePricingData(body)
|
||||
require.NoError(t, err)
|
||||
pricing := data["gpt-5.4"]
|
||||
require.NotNil(t, pricing)
|
||||
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 3e-5, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 5e-7, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||
require.True(t, pricing.SupportsServiceTier)
|
||||
}
|
||||
|
||||
func TestGetModelPricing_Gpt53CodexSparkUsesGpt51CodexPricing(t *testing.T) {
|
||||
sparkPricing := &LiteLLMModelPricing{InputCostPerToken: 1}
|
||||
gpt53Pricing := &LiteLLMModelPricing{InputCostPerToken: 9}
|
||||
@@ -68,3 +97,64 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T)
|
||||
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
|
||||
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
|
||||
}
|
||||
|
||||
func TestParsePricingData_PreservesPriorityAndServiceTierFields(t *testing.T) {
|
||||
raw := map[string]any{
|
||||
"gpt-5.4": map[string]any{
|
||||
"input_cost_per_token": 2.5e-6,
|
||||
"input_cost_per_token_priority": 5e-6,
|
||||
"output_cost_per_token": 15e-6,
|
||||
"output_cost_per_token_priority": 30e-6,
|
||||
"cache_read_input_token_cost": 0.25e-6,
|
||||
"cache_read_input_token_cost_priority": 0.5e-6,
|
||||
"supports_service_tier": true,
|
||||
"supports_prompt_caching": true,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
},
|
||||
}
|
||||
body, err := json.Marshal(raw)
|
||||
require.NoError(t, err)
|
||||
|
||||
svc := &PricingService{}
|
||||
pricingMap, err := svc.parsePricingData(body)
|
||||
require.NoError(t, err)
|
||||
|
||||
pricing := pricingMap["gpt-5.4"]
|
||||
require.NotNil(t, pricing)
|
||||
require.InDelta(t, 2.5e-6, pricing.InputCostPerToken, 1e-12)
|
||||
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 15e-6, pricing.OutputCostPerToken, 1e-12)
|
||||
require.InDelta(t, 30e-6, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 0.25e-6, pricing.CacheReadInputTokenCost, 1e-12)
|
||||
require.InDelta(t, 0.5e-6, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||
require.True(t, pricing.SupportsServiceTier)
|
||||
}
|
||||
|
||||
func TestParsePricingData_PreservesServiceTierPriorityFields(t *testing.T) {
|
||||
svc := &PricingService{}
|
||||
pricingData, err := svc.parsePricingData([]byte(`{
|
||||
"gpt-5.4": {
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"input_cost_per_token_priority": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"output_cost_per_token_priority": 0.00003,
|
||||
"cache_read_input_token_cost": 0.00000025,
|
||||
"cache_read_input_token_cost_priority": 0.0000005,
|
||||
"supports_service_tier": true,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat"
|
||||
}
|
||||
}`))
|
||||
require.NoError(t, err)
|
||||
|
||||
pricing := pricingData["gpt-5.4"]
|
||||
require.NotNil(t, pricing)
|
||||
require.InDelta(t, 0.0000025, pricing.InputCostPerToken, 1e-12)
|
||||
require.InDelta(t, 0.000005, pricing.InputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 0.000015, pricing.OutputCostPerToken, 1e-12)
|
||||
require.InDelta(t, 0.00003, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||
require.InDelta(t, 0.00000025, pricing.CacheReadInputTokenCost, 1e-12)
|
||||
require.InDelta(t, 0.0000005, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||
require.True(t, pricing.SupportsServiceTier)
|
||||
}
|
||||
|
||||
@@ -98,6 +98,8 @@ type UsageLog struct {
|
||||
AccountID int64
|
||||
RequestID string
|
||||
Model string
|
||||
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
|
||||
ServiceTier *string
|
||||
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API),
|
||||
// e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable.
|
||||
ReasoningEffort *string
|
||||
|
||||
5
backend/migrations/070_add_usage_log_service_tier.sql
Normal file
5
backend/migrations/070_add_usage_log_service_tier.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
ALTER TABLE usage_logs
|
||||
ADD COLUMN IF NOT EXISTS service_tier VARCHAR(16);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_usage_logs_service_tier_created_at
|
||||
ON usage_logs (service_tier, created_at);
|
||||
@@ -228,6 +228,14 @@
|
||||
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
||||
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
|
||||
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
|
||||
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
||||
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
||||
@@ -238,6 +246,10 @@
|
||||
</div>
|
||||
</div>
|
||||
<!-- Rate and Summary -->
|
||||
<div class="flex items-center justify-between gap-6">
|
||||
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
|
||||
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
|
||||
</div>
|
||||
<div class="flex items-center justify-between gap-6">
|
||||
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
||||
<span class="font-semibold text-blue-400">{{ (tooltipData?.rate_multiplier || 1).toFixed(2) }}x</span>
|
||||
@@ -271,6 +283,8 @@
|
||||
import { ref } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
||||
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
|
||||
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
|
||||
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
||||
import DataTable from '@/components/common/DataTable.vue'
|
||||
import EmptyState from '@/components/common/EmptyState.vue'
|
||||
|
||||
111
frontend/src/components/admin/usage/__tests__/UsageTable.spec.ts
Normal file
111
frontend/src/components/admin/usage/__tests__/UsageTable.spec.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { describe, expect, it, vi, beforeEach } from 'vitest'
|
||||
import { mount } from '@vue/test-utils'
|
||||
import { nextTick } from 'vue'
|
||||
|
||||
import UsageTable from '../UsageTable.vue'
|
||||
|
||||
const messages: Record<string, string> = {
|
||||
'usage.costDetails': 'Cost Breakdown',
|
||||
'admin.usage.inputCost': 'Input Cost',
|
||||
'admin.usage.outputCost': 'Output Cost',
|
||||
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
|
||||
'admin.usage.cacheReadCost': 'Cache Read Cost',
|
||||
'usage.inputTokenPrice': 'Input price',
|
||||
'usage.outputTokenPrice': 'Output price',
|
||||
'usage.perMillionTokens': '/ 1M tokens',
|
||||
'usage.serviceTier': 'Service tier',
|
||||
'usage.serviceTierPriority': 'Fast',
|
||||
'usage.serviceTierFlex': 'Flex',
|
||||
'usage.serviceTierStandard': 'Standard',
|
||||
'usage.rate': 'Rate',
|
||||
'usage.accountMultiplier': 'Account rate',
|
||||
'usage.original': 'Original',
|
||||
'usage.userBilled': 'User billed',
|
||||
'usage.accountBilled': 'Account billed',
|
||||
}
|
||||
|
||||
vi.mock('vue-i18n', async () => {
|
||||
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
|
||||
return {
|
||||
...actual,
|
||||
useI18n: () => ({
|
||||
t: (key: string) => messages[key] ?? key,
|
||||
}),
|
||||
}
|
||||
})
|
||||
|
||||
const DataTableStub = {
|
||||
props: ['data'],
|
||||
template: `
|
||||
<div>
|
||||
<div v-for="row in data" :key="row.request_id">
|
||||
<slot name="cell-cost" :row="row" />
|
||||
</div>
|
||||
</div>
|
||||
`,
|
||||
}
|
||||
|
||||
describe('admin UsageTable tooltip', () => {
|
||||
beforeEach(() => {
|
||||
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
|
||||
x: 0,
|
||||
y: 0,
|
||||
top: 20,
|
||||
left: 20,
|
||||
right: 120,
|
||||
bottom: 40,
|
||||
width: 100,
|
||||
height: 20,
|
||||
toJSON: () => ({}),
|
||||
} as DOMRect)
|
||||
})
|
||||
|
||||
it('shows service tier and billing breakdown in cost tooltip', async () => {
|
||||
const row = {
|
||||
request_id: 'req-admin-1',
|
||||
actual_cost: 0.092883,
|
||||
total_cost: 0.092883,
|
||||
account_rate_multiplier: 1,
|
||||
rate_multiplier: 1,
|
||||
service_tier: 'priority',
|
||||
input_cost: 0.020285,
|
||||
output_cost: 0.00303,
|
||||
cache_creation_cost: 0,
|
||||
cache_read_cost: 0.069568,
|
||||
input_tokens: 4057,
|
||||
output_tokens: 101,
|
||||
}
|
||||
|
||||
const wrapper = mount(UsageTable, {
|
||||
props: {
|
||||
data: [row],
|
||||
loading: false,
|
||||
columns: [],
|
||||
},
|
||||
global: {
|
||||
stubs: {
|
||||
DataTable: DataTableStub,
|
||||
EmptyState: true,
|
||||
Icon: true,
|
||||
Teleport: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await wrapper.find('.group.relative').trigger('mouseenter')
|
||||
await nextTick()
|
||||
|
||||
const text = wrapper.text()
|
||||
expect(text).toContain('Service tier')
|
||||
expect(text).toContain('Fast')
|
||||
expect(text).toContain('Rate')
|
||||
expect(text).toContain('1.00x')
|
||||
expect(text).toContain('Account rate')
|
||||
expect(text).toContain('User billed')
|
||||
expect(text).toContain('Account billed')
|
||||
expect(text).toContain('$0.092883')
|
||||
expect(text).toContain('$5.0000 / 1M tokens')
|
||||
expect(text).toContain('$30.0000 / 1M tokens')
|
||||
expect(text).toContain('$0.069568')
|
||||
})
|
||||
})
|
||||
20
frontend/src/i18n/__tests__/usageServiceTierLocales.spec.ts
Normal file
20
frontend/src/i18n/__tests__/usageServiceTierLocales.spec.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import en from '../locales/en'
|
||||
import zh from '../locales/zh'
|
||||
|
||||
describe('usage service tier locale keys', () => {
|
||||
it('contains zh labels for service tier tooltip', () => {
|
||||
expect(zh.usage.serviceTier).toBe('服务档位')
|
||||
expect(zh.usage.serviceTierPriority).toBe('Fast')
|
||||
expect(zh.usage.serviceTierFlex).toBe('Flex')
|
||||
expect(zh.usage.serviceTierStandard).toBe('Standard')
|
||||
})
|
||||
|
||||
it('contains en labels for service tier tooltip', () => {
|
||||
expect(en.usage.serviceTier).toBe('Service tier')
|
||||
expect(en.usage.serviceTierPriority).toBe('Fast')
|
||||
expect(en.usage.serviceTierFlex).toBe('Flex')
|
||||
expect(en.usage.serviceTierStandard).toBe('Standard')
|
||||
})
|
||||
})
|
||||
@@ -722,8 +722,15 @@ export default {
|
||||
unknown: 'Unknown',
|
||||
in: 'In',
|
||||
out: 'Out',
|
||||
inputTokenPrice: 'Input price',
|
||||
outputTokenPrice: 'Output price',
|
||||
perMillionTokens: '/ 1M tokens',
|
||||
cacheRead: 'Read',
|
||||
cacheWrite: 'Write',
|
||||
serviceTier: 'Service tier',
|
||||
serviceTierPriority: 'Fast',
|
||||
serviceTierFlex: 'Flex',
|
||||
serviceTierStandard: 'Standard',
|
||||
rate: 'Rate',
|
||||
original: 'Original',
|
||||
billed: 'Billed',
|
||||
|
||||
@@ -727,8 +727,15 @@ export default {
|
||||
unknown: '未知',
|
||||
in: '输入',
|
||||
out: '输出',
|
||||
inputTokenPrice: '输入单价',
|
||||
outputTokenPrice: '输出单价',
|
||||
perMillionTokens: '/ 1M Token',
|
||||
cacheRead: '读取',
|
||||
cacheWrite: '写入',
|
||||
serviceTier: '服务档位',
|
||||
serviceTierPriority: 'Fast',
|
||||
serviceTierFlex: 'Flex',
|
||||
serviceTierStandard: 'Standard',
|
||||
rate: '倍率',
|
||||
original: '原始',
|
||||
billed: '计费',
|
||||
|
||||
@@ -927,6 +927,7 @@ export interface UsageLog {
|
||||
account_id: number | null
|
||||
request_id: string
|
||||
model: string
|
||||
service_tier?: string | null
|
||||
reasoning_effort?: string | null
|
||||
|
||||
group_id: number | null
|
||||
|
||||
39
frontend/src/utils/__tests__/usageServiceTier.spec.ts
Normal file
39
frontend/src/utils/__tests__/usageServiceTier.spec.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { formatUsageServiceTier, getUsageServiceTierLabel, normalizeUsageServiceTier } from '@/utils/usageServiceTier'
|
||||
|
||||
describe('usageServiceTier utils', () => {
|
||||
it('normalizes fast/default aliases', () => {
|
||||
expect(normalizeUsageServiceTier('fast')).toBe('priority')
|
||||
expect(normalizeUsageServiceTier(' default ')).toBe('standard')
|
||||
expect(normalizeUsageServiceTier('STANDARD')).toBe('standard')
|
||||
})
|
||||
|
||||
it('preserves supported tiers', () => {
|
||||
expect(normalizeUsageServiceTier('priority')).toBe('priority')
|
||||
expect(normalizeUsageServiceTier('flex')).toBe('flex')
|
||||
})
|
||||
|
||||
it('formats empty values as standard', () => {
|
||||
expect(formatUsageServiceTier()).toBe('standard')
|
||||
expect(formatUsageServiceTier('')).toBe('standard')
|
||||
})
|
||||
|
||||
it('passes through unknown non-empty tiers for display fallback', () => {
|
||||
expect(normalizeUsageServiceTier('custom-tier')).toBe('custom-tier')
|
||||
expect(formatUsageServiceTier('custom-tier')).toBe('custom-tier')
|
||||
})
|
||||
|
||||
it('maps tiers to translated labels', () => {
|
||||
const translate = (key: string) => ({
|
||||
'usage.serviceTierPriority': 'Fast',
|
||||
'usage.serviceTierFlex': 'Flex',
|
||||
'usage.serviceTierStandard': 'Standard',
|
||||
})[key] ?? key
|
||||
|
||||
expect(getUsageServiceTierLabel('fast', translate)).toBe('Fast')
|
||||
expect(getUsageServiceTierLabel('flex', translate)).toBe('Flex')
|
||||
expect(getUsageServiceTierLabel(undefined, translate)).toBe('Standard')
|
||||
expect(getUsageServiceTierLabel('custom-tier', translate)).toBe('custom-tier')
|
||||
})
|
||||
})
|
||||
49
frontend/src/utils/usagePricing.ts
Normal file
49
frontend/src/utils/usagePricing.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
export const TOKENS_PER_MILLION = 1_000_000
|
||||
|
||||
interface TokenPriceFormatOptions {
|
||||
fractionDigits?: number
|
||||
withCurrencySymbol?: boolean
|
||||
emptyValue?: string
|
||||
}
|
||||
|
||||
function isFiniteNumber(value: unknown): value is number {
|
||||
return typeof value === 'number' && Number.isFinite(value)
|
||||
}
|
||||
|
||||
export function calculateTokenUnitPrice(
|
||||
cost: number | null | undefined,
|
||||
tokens: number | null | undefined
|
||||
): number | null {
|
||||
if (!isFiniteNumber(cost) || !isFiniteNumber(tokens) || tokens <= 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
return cost / tokens
|
||||
}
|
||||
|
||||
export function calculateTokenPricePerMillion(
|
||||
cost: number | null | undefined,
|
||||
tokens: number | null | undefined
|
||||
): number | null {
|
||||
const unitPrice = calculateTokenUnitPrice(cost, tokens)
|
||||
if (unitPrice == null) {
|
||||
return null
|
||||
}
|
||||
|
||||
return unitPrice * TOKENS_PER_MILLION
|
||||
}
|
||||
|
||||
export function formatTokenPricePerMillion(
|
||||
cost: number | null | undefined,
|
||||
tokens: number | null | undefined,
|
||||
options: TokenPriceFormatOptions = {}
|
||||
): string {
|
||||
const pricePerMillion = calculateTokenPricePerMillion(cost, tokens)
|
||||
if (pricePerMillion == null) {
|
||||
return options.emptyValue ?? '-'
|
||||
}
|
||||
|
||||
const fractionDigits = options.fractionDigits ?? 4
|
||||
const formatted = pricePerMillion.toFixed(fractionDigits)
|
||||
return options.withCurrencySymbol == false ? formatted : `$${formatted}`
|
||||
}
|
||||
25
frontend/src/utils/usageServiceTier.ts
Normal file
25
frontend/src/utils/usageServiceTier.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
export function normalizeUsageServiceTier(serviceTier?: string | null): string | null {
|
||||
const value = serviceTier?.trim().toLowerCase()
|
||||
if (!value) return null
|
||||
if (value === 'fast') return 'priority'
|
||||
if (value === 'default' || value === 'standard') return 'standard'
|
||||
if (value === 'priority' || value === 'flex') return value
|
||||
return value
|
||||
}
|
||||
|
||||
export function formatUsageServiceTier(serviceTier?: string | null): string {
|
||||
const normalized = normalizeUsageServiceTier(serviceTier)
|
||||
if (!normalized) return 'standard'
|
||||
return normalized
|
||||
}
|
||||
|
||||
export function getUsageServiceTierLabel(
|
||||
serviceTier: string | null | undefined,
|
||||
translate: (key: string) => string,
|
||||
): string {
|
||||
const tier = formatUsageServiceTier(serviceTier)
|
||||
if (tier === 'priority') return translate('usage.serviceTierPriority')
|
||||
if (tier === 'flex') return translate('usage.serviceTierFlex')
|
||||
if (tier === 'standard') return translate('usage.serviceTierStandard')
|
||||
return tier
|
||||
}
|
||||
@@ -426,6 +426,14 @@
|
||||
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
||||
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
|
||||
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
|
||||
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||
</div>
|
||||
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
||||
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
||||
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
||||
@@ -436,6 +444,10 @@
|
||||
</div>
|
||||
</div>
|
||||
<!-- Rate and Summary -->
|
||||
<div class="flex items-center justify-between gap-6">
|
||||
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
|
||||
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
|
||||
</div>
|
||||
<div class="flex items-center justify-between gap-6">
|
||||
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
||||
<span class="font-semibold text-blue-400"
|
||||
@@ -478,6 +490,8 @@ import Icon from '@/components/icons/Icon.vue'
|
||||
import type { UsageLog, ApiKey, UsageQueryParams, UsageStatsResponse } from '@/types'
|
||||
import type { Column } from '@/components/common/types'
|
||||
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
||||
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
|
||||
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
|
||||
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
||||
|
||||
const { t } = useI18n()
|
||||
|
||||
266
frontend/src/views/user/__tests__/UsageView.spec.ts
Normal file
266
frontend/src/views/user/__tests__/UsageView.spec.ts
Normal file
@@ -0,0 +1,266 @@
|
||||
import { describe, expect, it, vi, beforeEach } from 'vitest'
|
||||
import { flushPromises, mount } from '@vue/test-utils'
|
||||
import { nextTick } from 'vue'
|
||||
|
||||
import UsageView from '../UsageView.vue'
|
||||
|
||||
const { query, getStatsByDateRange, list, showError, showWarning, showSuccess, showInfo } = vi.hoisted(() => ({
|
||||
query: vi.fn(),
|
||||
getStatsByDateRange: vi.fn(),
|
||||
list: vi.fn(),
|
||||
showError: vi.fn(),
|
||||
showWarning: vi.fn(),
|
||||
showSuccess: vi.fn(),
|
||||
showInfo: vi.fn(),
|
||||
}))
|
||||
|
||||
const messages: Record<string, string> = {
|
||||
'usage.costDetails': 'Cost Breakdown',
|
||||
'admin.usage.inputCost': 'Input Cost',
|
||||
'admin.usage.outputCost': 'Output Cost',
|
||||
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
|
||||
'admin.usage.cacheReadCost': 'Cache Read Cost',
|
||||
'usage.inputTokenPrice': 'Input price',
|
||||
'usage.outputTokenPrice': 'Output price',
|
||||
'usage.perMillionTokens': '/ 1M tokens',
|
||||
'usage.serviceTier': 'Service tier',
|
||||
'usage.serviceTierPriority': 'Fast',
|
||||
'usage.serviceTierFlex': 'Flex',
|
||||
'usage.serviceTierStandard': 'Standard',
|
||||
'usage.rate': 'Rate',
|
||||
'usage.original': 'Original',
|
||||
'usage.billed': 'Billed',
|
||||
'usage.allApiKeys': 'All API Keys',
|
||||
'usage.apiKeyFilter': 'API Key',
|
||||
'usage.model': 'Model',
|
||||
'usage.reasoningEffort': 'Reasoning Effort',
|
||||
'usage.type': 'Type',
|
||||
'usage.tokens': 'Tokens',
|
||||
'usage.cost': 'Cost',
|
||||
'usage.firstToken': 'First Token',
|
||||
'usage.duration': 'Duration',
|
||||
'usage.time': 'Time',
|
||||
'usage.userAgent': 'User Agent',
|
||||
}
|
||||
|
||||
vi.mock('@/api', () => ({
|
||||
usageAPI: {
|
||||
query,
|
||||
getStatsByDateRange,
|
||||
},
|
||||
keysAPI: {
|
||||
list,
|
||||
},
|
||||
}))
|
||||
|
||||
vi.mock('@/stores/app', () => ({
|
||||
useAppStore: () => ({ showError, showWarning, showSuccess, showInfo }),
|
||||
}))
|
||||
|
||||
vi.mock('vue-i18n', async () => {
|
||||
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
|
||||
return {
|
||||
...actual,
|
||||
useI18n: () => ({
|
||||
t: (key: string) => messages[key] ?? key,
|
||||
}),
|
||||
}
|
||||
})
|
||||
|
||||
const AppLayoutStub = { template: '<div><slot /></div>' }
|
||||
const TablePageLayoutStub = {
|
||||
template: '<div><slot name="actions" /><slot name="filters" /><slot /></div>',
|
||||
}
|
||||
|
||||
describe('user UsageView tooltip', () => {
|
||||
beforeEach(() => {
|
||||
query.mockReset()
|
||||
getStatsByDateRange.mockReset()
|
||||
list.mockReset()
|
||||
showError.mockReset()
|
||||
showWarning.mockReset()
|
||||
showSuccess.mockReset()
|
||||
showInfo.mockReset()
|
||||
|
||||
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
|
||||
x: 0,
|
||||
y: 0,
|
||||
top: 20,
|
||||
left: 20,
|
||||
right: 120,
|
||||
bottom: 40,
|
||||
width: 100,
|
||||
height: 20,
|
||||
toJSON: () => ({}),
|
||||
} as DOMRect)
|
||||
|
||||
;(globalThis as any).ResizeObserver = class {
|
||||
observe() {}
|
||||
disconnect() {}
|
||||
}
|
||||
})
|
||||
|
||||
it('shows fast service tier and unit prices in user tooltip', async () => {
|
||||
query.mockResolvedValue({
|
||||
items: [
|
||||
{
|
||||
request_id: 'req-user-1',
|
||||
actual_cost: 0.092883,
|
||||
total_cost: 0.092883,
|
||||
rate_multiplier: 1,
|
||||
service_tier: 'priority',
|
||||
input_cost: 0.020285,
|
||||
output_cost: 0.00303,
|
||||
cache_creation_cost: 0,
|
||||
cache_read_cost: 0.069568,
|
||||
input_tokens: 4057,
|
||||
output_tokens: 101,
|
||||
cache_creation_tokens: 0,
|
||||
cache_read_tokens: 278272,
|
||||
cache_creation_5m_tokens: 0,
|
||||
cache_creation_1h_tokens: 0,
|
||||
image_count: 0,
|
||||
image_size: null,
|
||||
first_token_ms: null,
|
||||
duration_ms: 1,
|
||||
created_at: '2026-03-08T00:00:00Z',
|
||||
},
|
||||
],
|
||||
total: 1,
|
||||
pages: 1,
|
||||
})
|
||||
getStatsByDateRange.mockResolvedValue({
|
||||
total_requests: 1,
|
||||
total_tokens: 100,
|
||||
total_cost: 0.1,
|
||||
avg_duration_ms: 1,
|
||||
})
|
||||
list.mockResolvedValue({ items: [] })
|
||||
|
||||
const wrapper = mount(UsageView, {
|
||||
global: {
|
||||
stubs: {
|
||||
AppLayout: AppLayoutStub,
|
||||
TablePageLayout: TablePageLayoutStub,
|
||||
Pagination: true,
|
||||
EmptyState: true,
|
||||
Select: true,
|
||||
DateRangePicker: true,
|
||||
Icon: true,
|
||||
Teleport: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await flushPromises()
|
||||
await nextTick()
|
||||
|
||||
const setupState = (wrapper.vm as any).$?.setupState
|
||||
setupState.tooltipData = {
|
||||
request_id: 'req-user-1',
|
||||
actual_cost: 0.092883,
|
||||
total_cost: 0.092883,
|
||||
rate_multiplier: 1,
|
||||
service_tier: 'priority',
|
||||
input_cost: 0.020285,
|
||||
output_cost: 0.00303,
|
||||
cache_creation_cost: 0,
|
||||
cache_read_cost: 0.069568,
|
||||
input_tokens: 4057,
|
||||
output_tokens: 101,
|
||||
}
|
||||
setupState.tooltipVisible = true
|
||||
await nextTick()
|
||||
|
||||
const text = wrapper.text()
|
||||
expect(text).toContain('Service tier')
|
||||
expect(text).toContain('Fast')
|
||||
expect(text).toContain('Rate')
|
||||
expect(text).toContain('1.00x')
|
||||
expect(text).toContain('Billed')
|
||||
expect(text).toContain('$0.092883')
|
||||
expect(text).toContain('$5.0000 / 1M tokens')
|
||||
expect(text).toContain('$30.0000 / 1M tokens')
|
||||
})
|
||||
|
||||
it('exports csv with input and output unit price columns', async () => {
|
||||
const exportedLogs = [
|
||||
{
|
||||
request_id: 'req-user-export',
|
||||
actual_cost: 0.092883,
|
||||
total_cost: 0.092883,
|
||||
rate_multiplier: 1,
|
||||
service_tier: 'priority',
|
||||
input_cost: 0.020285,
|
||||
output_cost: 0.00303,
|
||||
cache_creation_cost: 0.000001,
|
||||
cache_read_cost: 0.069568,
|
||||
input_tokens: 4057,
|
||||
output_tokens: 101,
|
||||
cache_creation_tokens: 4,
|
||||
cache_read_tokens: 278272,
|
||||
cache_creation_5m_tokens: 0,
|
||||
cache_creation_1h_tokens: 0,
|
||||
image_count: 0,
|
||||
image_size: null,
|
||||
first_token_ms: 12,
|
||||
duration_ms: 345,
|
||||
created_at: '2026-03-08T00:00:00Z',
|
||||
model: 'gpt-5.4',
|
||||
reasoning_effort: null,
|
||||
api_key: { name: 'demo-key' },
|
||||
},
|
||||
]
|
||||
|
||||
query.mockResolvedValue({
|
||||
items: exportedLogs,
|
||||
total: 1,
|
||||
pages: 1,
|
||||
})
|
||||
getStatsByDateRange.mockResolvedValue({
|
||||
total_requests: 1,
|
||||
total_tokens: 100,
|
||||
total_cost: 0.1,
|
||||
avg_duration_ms: 1,
|
||||
})
|
||||
list.mockResolvedValue({ items: [] })
|
||||
|
||||
let exportedBlob: Blob | null = null
|
||||
const originalCreateObjectURL = window.URL.createObjectURL
|
||||
const originalRevokeObjectURL = window.URL.revokeObjectURL
|
||||
window.URL.createObjectURL = vi.fn((blob: Blob | MediaSource) => {
|
||||
exportedBlob = blob as Blob
|
||||
return 'blob:usage-export'
|
||||
}) as typeof window.URL.createObjectURL
|
||||
window.URL.revokeObjectURL = vi.fn(() => {}) as typeof window.URL.revokeObjectURL
|
||||
const clickSpy = vi.spyOn(HTMLAnchorElement.prototype, 'click').mockImplementation(() => {})
|
||||
|
||||
const wrapper = mount(UsageView, {
|
||||
global: {
|
||||
stubs: {
|
||||
AppLayout: AppLayoutStub,
|
||||
TablePageLayout: TablePageLayoutStub,
|
||||
Pagination: true,
|
||||
EmptyState: true,
|
||||
Select: true,
|
||||
DateRangePicker: true,
|
||||
Icon: true,
|
||||
Teleport: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
await flushPromises()
|
||||
|
||||
const setupState = (wrapper.vm as any).$?.setupState
|
||||
await setupState.exportToCSV()
|
||||
|
||||
expect(exportedBlob).not.toBeNull()
|
||||
expect(clickSpy).toHaveBeenCalled()
|
||||
expect(showSuccess).toHaveBeenCalled()
|
||||
|
||||
window.URL.createObjectURL = originalCreateObjectURL
|
||||
window.URL.revokeObjectURL = originalRevokeObjectURL
|
||||
clickSpy.mockRestore()
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user