fix(billing): 修复 OpenAI fast 档位计费并补齐展示

- 打通 service_tier 在 OpenAI HTTP、WS、passthrough 与 usage 记录中的传递
- 修正 priority/flex 计费逻辑,并将 fast 归一化为 priority
- 在用户端和管理端补齐服务档位与计费明细展示
- 补齐前后端测试,并修复 WS 限流信号重复持久化导致的全量回归失败

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yangjianbo
2026-03-08 23:22:28 +08:00
parent bcb6444f89
commit 87f4ed591e
29 changed files with 1417 additions and 47 deletions

View File

@@ -40,13 +40,17 @@ var (
// 只保留我们需要的字段,使用指针来处理可能缺失的值
type LiteLLMModelPricing struct {
InputCostPerToken float64 `json:"input_cost_per_token"`
InputCostPerTokenPriority float64 `json:"input_cost_per_token_priority"`
OutputCostPerToken float64 `json:"output_cost_per_token"`
OutputCostPerTokenPriority float64 `json:"output_cost_per_token_priority"`
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
CacheReadInputTokenCostPriority float64 `json:"cache_read_input_token_cost_priority"`
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
SupportsServiceTier bool `json:"supports_service_tier"`
LiteLLMProvider string `json:"litellm_provider"`
Mode string `json:"mode"`
SupportsPromptCaching bool `json:"supports_prompt_caching"`
@@ -62,10 +66,14 @@ type PricingRemoteClient interface {
// LiteLLMRawEntry 用于解析原始JSON数据
type LiteLLMRawEntry struct {
InputCostPerToken *float64 `json:"input_cost_per_token"`
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority"`
OutputCostPerToken *float64 `json:"output_cost_per_token"`
OutputCostPerTokenPriority *float64 `json:"output_cost_per_token_priority"`
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"`
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"`
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"`
CacheReadInputTokenCostPriority *float64 `json:"cache_read_input_token_cost_priority"`
SupportsServiceTier bool `json:"supports_service_tier"`
LiteLLMProvider string `json:"litellm_provider"`
Mode string `json:"mode"`
SupportsPromptCaching bool `json:"supports_prompt_caching"`
@@ -324,14 +332,21 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
LiteLLMProvider: entry.LiteLLMProvider,
Mode: entry.Mode,
SupportsPromptCaching: entry.SupportsPromptCaching,
SupportsServiceTier: entry.SupportsServiceTier,
}
if entry.InputCostPerToken != nil {
pricing.InputCostPerToken = *entry.InputCostPerToken
}
if entry.InputCostPerTokenPriority != nil {
pricing.InputCostPerTokenPriority = *entry.InputCostPerTokenPriority
}
if entry.OutputCostPerToken != nil {
pricing.OutputCostPerToken = *entry.OutputCostPerToken
}
if entry.OutputCostPerTokenPriority != nil {
pricing.OutputCostPerTokenPriority = *entry.OutputCostPerTokenPriority
}
if entry.CacheCreationInputTokenCost != nil {
pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost
}
@@ -341,6 +356,9 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
if entry.CacheReadInputTokenCost != nil {
pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost
}
if entry.CacheReadInputTokenCostPriority != nil {
pricing.CacheReadInputTokenCostPriority = *entry.CacheReadInputTokenCostPriority
}
if entry.OutputCostPerImage != nil {
pricing.OutputCostPerImage = *entry.OutputCostPerImage
}