mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 04:40:59 +00:00
fix: channel affinity (#2799)
* fix: channel affinity log styles * fix: Issue with incorrect data storage when switching key sources * feat: support not retrying after a single rule configuration fails * fix: render channel affinity tooltip as multiline content * feat: channel affinity cache hit * fix: prevent ChannelAffinityUsageCacheModal infinite loading and hide data before fetch * chore: format backend with gofmt and frontend with prettier/eslint autofix
This commit is contained in:
@@ -2,6 +2,7 @@ package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -9,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/pkg/cachex"
|
||||
"github.com/QuantumNous/new-api/setting/operation_setting"
|
||||
"github.com/gin-gonic/gin"
|
||||
@@ -21,14 +23,19 @@ const (
|
||||
ginKeyChannelAffinityTTLSeconds = "channel_affinity_ttl_seconds"
|
||||
ginKeyChannelAffinityMeta = "channel_affinity_meta"
|
||||
ginKeyChannelAffinityLogInfo = "channel_affinity_log_info"
|
||||
ginKeyChannelAffinitySkipRetry = "channel_affinity_skip_retry_on_failure"
|
||||
|
||||
channelAffinityCacheNamespace = "new-api:channel_affinity:v1"
|
||||
channelAffinityCacheNamespace = "new-api:channel_affinity:v1"
|
||||
channelAffinityUsageCacheStatsNamespace = "new-api:channel_affinity_usage_cache_stats:v1"
|
||||
)
|
||||
|
||||
var (
|
||||
channelAffinityCacheOnce sync.Once
|
||||
channelAffinityCache *cachex.HybridCache[int]
|
||||
|
||||
channelAffinityUsageCacheStatsOnce sync.Once
|
||||
channelAffinityUsageCacheStatsCache *cachex.HybridCache[ChannelAffinityUsageCacheCounters]
|
||||
|
||||
channelAffinityRegexCache sync.Map // map[string]*regexp.Regexp
|
||||
)
|
||||
|
||||
@@ -36,15 +43,24 @@ type channelAffinityMeta struct {
|
||||
CacheKey string
|
||||
TTLSeconds int
|
||||
RuleName string
|
||||
SkipRetry bool
|
||||
KeySourceType string
|
||||
KeySourceKey string
|
||||
KeySourcePath string
|
||||
KeyHint string
|
||||
KeyFingerprint string
|
||||
UsingGroup string
|
||||
ModelName string
|
||||
RequestPath string
|
||||
}
|
||||
|
||||
type ChannelAffinityStatsContext struct {
|
||||
RuleName string
|
||||
UsingGroup string
|
||||
KeyFingerprint string
|
||||
TTLSeconds int64
|
||||
}
|
||||
|
||||
type ChannelAffinityCacheStats struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Total int `json:"total"`
|
||||
@@ -338,6 +354,32 @@ func getChannelAffinityMeta(c *gin.Context) (channelAffinityMeta, bool) {
|
||||
return meta, true
|
||||
}
|
||||
|
||||
func GetChannelAffinityStatsContext(c *gin.Context) (ChannelAffinityStatsContext, bool) {
|
||||
if c == nil {
|
||||
return ChannelAffinityStatsContext{}, false
|
||||
}
|
||||
meta, ok := getChannelAffinityMeta(c)
|
||||
if !ok {
|
||||
return ChannelAffinityStatsContext{}, false
|
||||
}
|
||||
ruleName := strings.TrimSpace(meta.RuleName)
|
||||
keyFp := strings.TrimSpace(meta.KeyFingerprint)
|
||||
usingGroup := strings.TrimSpace(meta.UsingGroup)
|
||||
if ruleName == "" || keyFp == "" {
|
||||
return ChannelAffinityStatsContext{}, false
|
||||
}
|
||||
ttlSeconds := int64(meta.TTLSeconds)
|
||||
if ttlSeconds <= 0 {
|
||||
return ChannelAffinityStatsContext{}, false
|
||||
}
|
||||
return ChannelAffinityStatsContext{
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFp,
|
||||
TTLSeconds: ttlSeconds,
|
||||
}, true
|
||||
}
|
||||
|
||||
func affinityFingerprint(s string) string {
|
||||
if s == "" {
|
||||
return ""
|
||||
@@ -349,6 +391,19 @@ func affinityFingerprint(s string) string {
|
||||
return hex
|
||||
}
|
||||
|
||||
func buildChannelAffinityKeyHint(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
s = strings.ReplaceAll(s, "\n", " ")
|
||||
s = strings.ReplaceAll(s, "\r", " ")
|
||||
if len(s) <= 12 {
|
||||
return s
|
||||
}
|
||||
return s[:4] + "..." + s[len(s)-4:]
|
||||
}
|
||||
|
||||
func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup string) (int, bool) {
|
||||
setting := operation_setting.GetChannelAffinitySetting()
|
||||
if setting == nil || !setting.Enabled {
|
||||
@@ -399,9 +454,11 @@ func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup
|
||||
CacheKey: cacheKeyFull,
|
||||
TTLSeconds: ttlSeconds,
|
||||
RuleName: rule.Name,
|
||||
SkipRetry: rule.SkipRetryOnFailure,
|
||||
KeySourceType: strings.TrimSpace(usedSource.Type),
|
||||
KeySourceKey: strings.TrimSpace(usedSource.Key),
|
||||
KeySourcePath: strings.TrimSpace(usedSource.Path),
|
||||
KeyHint: buildChannelAffinityKeyHint(affinityValue),
|
||||
KeyFingerprint: affinityFingerprint(affinityValue),
|
||||
UsingGroup: usingGroup,
|
||||
ModelName: modelName,
|
||||
@@ -422,6 +479,21 @@ func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func ShouldSkipRetryAfterChannelAffinityFailure(c *gin.Context) bool {
|
||||
if c == nil {
|
||||
return false
|
||||
}
|
||||
v, ok := c.Get(ginKeyChannelAffinitySkipRetry)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
b, ok := v.(bool)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int) {
|
||||
if c == nil || channelID <= 0 {
|
||||
return
|
||||
@@ -430,6 +502,7 @@ func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
c.Set(ginKeyChannelAffinitySkipRetry, meta.SkipRetry)
|
||||
info := map[string]interface{}{
|
||||
"reason": meta.RuleName,
|
||||
"rule_name": meta.RuleName,
|
||||
@@ -441,6 +514,7 @@ func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int
|
||||
"key_source": meta.KeySourceType,
|
||||
"key_key": meta.KeySourceKey,
|
||||
"key_path": meta.KeySourcePath,
|
||||
"key_hint": meta.KeyHint,
|
||||
"key_fp": meta.KeyFingerprint,
|
||||
}
|
||||
c.Set(ginKeyChannelAffinityLogInfo, info)
|
||||
@@ -485,3 +559,225 @@ func RecordChannelAffinity(c *gin.Context, channelID int) {
|
||||
common.SysError(fmt.Sprintf("channel affinity cache set failed: key=%s, err=%v", cacheKey, err))
|
||||
}
|
||||
}
|
||||
|
||||
type ChannelAffinityUsageCacheStats struct {
|
||||
RuleName string `json:"rule_name"`
|
||||
UsingGroup string `json:"using_group"`
|
||||
KeyFingerprint string `json:"key_fp"`
|
||||
|
||||
Hit int64 `json:"hit"`
|
||||
Total int64 `json:"total"`
|
||||
WindowSeconds int64 `json:"window_seconds"`
|
||||
|
||||
PromptTokens int64 `json:"prompt_tokens"`
|
||||
CompletionTokens int64 `json:"completion_tokens"`
|
||||
TotalTokens int64 `json:"total_tokens"`
|
||||
CachedTokens int64 `json:"cached_tokens"`
|
||||
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
|
||||
LastSeenAt int64 `json:"last_seen_at"`
|
||||
}
|
||||
|
||||
type ChannelAffinityUsageCacheCounters struct {
|
||||
Hit int64 `json:"hit"`
|
||||
Total int64 `json:"total"`
|
||||
WindowSeconds int64 `json:"window_seconds"`
|
||||
|
||||
PromptTokens int64 `json:"prompt_tokens"`
|
||||
CompletionTokens int64 `json:"completion_tokens"`
|
||||
TotalTokens int64 `json:"total_tokens"`
|
||||
CachedTokens int64 `json:"cached_tokens"`
|
||||
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
|
||||
LastSeenAt int64 `json:"last_seen_at"`
|
||||
}
|
||||
|
||||
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
|
||||
|
||||
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) {
|
||||
statsCtx, ok := GetChannelAffinityStatsContext(c)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
observeChannelAffinityUsageCache(statsCtx, usage)
|
||||
}
|
||||
|
||||
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
|
||||
ruleName = strings.TrimSpace(ruleName)
|
||||
usingGroup = strings.TrimSpace(usingGroup)
|
||||
keyFp = strings.TrimSpace(keyFp)
|
||||
|
||||
entryKey := channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp)
|
||||
if entryKey == "" {
|
||||
return ChannelAffinityUsageCacheStats{
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFp,
|
||||
}
|
||||
}
|
||||
|
||||
cache := getChannelAffinityUsageCacheStatsCache()
|
||||
v, found, err := cache.Get(entryKey)
|
||||
if err != nil || !found {
|
||||
return ChannelAffinityUsageCacheStats{
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFp,
|
||||
}
|
||||
}
|
||||
return ChannelAffinityUsageCacheStats{
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFp,
|
||||
Hit: v.Hit,
|
||||
Total: v.Total,
|
||||
WindowSeconds: v.WindowSeconds,
|
||||
PromptTokens: v.PromptTokens,
|
||||
CompletionTokens: v.CompletionTokens,
|
||||
TotalTokens: v.TotalTokens,
|
||||
CachedTokens: v.CachedTokens,
|
||||
PromptCacheHitTokens: v.PromptCacheHitTokens,
|
||||
LastSeenAt: v.LastSeenAt,
|
||||
}
|
||||
}
|
||||
|
||||
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) {
|
||||
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
|
||||
if entryKey == "" {
|
||||
return
|
||||
}
|
||||
|
||||
windowSeconds := statsCtx.TTLSeconds
|
||||
if windowSeconds <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
cache := getChannelAffinityUsageCacheStatsCache()
|
||||
ttl := time.Duration(windowSeconds) * time.Second
|
||||
|
||||
lock := channelAffinityUsageCacheStatsLock(entryKey)
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
|
||||
prev, found, err := cache.Get(entryKey)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
next := prev
|
||||
if !found {
|
||||
next = ChannelAffinityUsageCacheCounters{}
|
||||
}
|
||||
next.Total++
|
||||
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
|
||||
if hit {
|
||||
next.Hit++
|
||||
}
|
||||
next.WindowSeconds = windowSeconds
|
||||
next.LastSeenAt = time.Now().Unix()
|
||||
next.CachedTokens += cachedTokens
|
||||
next.PromptCacheHitTokens += promptCacheHitTokens
|
||||
next.PromptTokens += int64(usagePromptTokens(usage))
|
||||
next.CompletionTokens += int64(usageCompletionTokens(usage))
|
||||
next.TotalTokens += int64(usageTotalTokens(usage))
|
||||
_ = cache.SetWithTTL(entryKey, next, ttl)
|
||||
}
|
||||
|
||||
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
|
||||
ruleName = strings.TrimSpace(ruleName)
|
||||
usingGroup = strings.TrimSpace(usingGroup)
|
||||
keyFp = strings.TrimSpace(keyFp)
|
||||
if ruleName == "" || keyFp == "" {
|
||||
return ""
|
||||
}
|
||||
return ruleName + "\n" + usingGroup + "\n" + keyFp
|
||||
}
|
||||
|
||||
func usageCacheSignals(usage *dto.Usage) (hit bool, cachedTokens int64, promptCacheHitTokens int64) {
|
||||
if usage == nil {
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
cached := int64(0)
|
||||
if usage.PromptTokensDetails.CachedTokens > 0 {
|
||||
cached = int64(usage.PromptTokensDetails.CachedTokens)
|
||||
} else if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
|
||||
cached = int64(usage.InputTokensDetails.CachedTokens)
|
||||
}
|
||||
pcht := int64(0)
|
||||
if usage.PromptCacheHitTokens > 0 {
|
||||
pcht = int64(usage.PromptCacheHitTokens)
|
||||
}
|
||||
return cached > 0 || pcht > 0, cached, pcht
|
||||
}
|
||||
|
||||
func usagePromptTokens(usage *dto.Usage) int {
|
||||
if usage == nil {
|
||||
return 0
|
||||
}
|
||||
if usage.PromptTokens > 0 {
|
||||
return usage.PromptTokens
|
||||
}
|
||||
return usage.InputTokens
|
||||
}
|
||||
|
||||
func usageCompletionTokens(usage *dto.Usage) int {
|
||||
if usage == nil {
|
||||
return 0
|
||||
}
|
||||
if usage.CompletionTokens > 0 {
|
||||
return usage.CompletionTokens
|
||||
}
|
||||
return usage.OutputTokens
|
||||
}
|
||||
|
||||
func usageTotalTokens(usage *dto.Usage) int {
|
||||
if usage == nil {
|
||||
return 0
|
||||
}
|
||||
if usage.TotalTokens > 0 {
|
||||
return usage.TotalTokens
|
||||
}
|
||||
pt := usagePromptTokens(usage)
|
||||
ct := usageCompletionTokens(usage)
|
||||
if pt > 0 || ct > 0 {
|
||||
return pt + ct
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func getChannelAffinityUsageCacheStatsCache() *cachex.HybridCache[ChannelAffinityUsageCacheCounters] {
|
||||
channelAffinityUsageCacheStatsOnce.Do(func() {
|
||||
setting := operation_setting.GetChannelAffinitySetting()
|
||||
capacity := 100_000
|
||||
defaultTTLSeconds := 3600
|
||||
if setting != nil {
|
||||
if setting.MaxEntries > 0 {
|
||||
capacity = setting.MaxEntries
|
||||
}
|
||||
if setting.DefaultTTLSeconds > 0 {
|
||||
defaultTTLSeconds = setting.DefaultTTLSeconds
|
||||
}
|
||||
}
|
||||
|
||||
channelAffinityUsageCacheStatsCache = cachex.NewHybridCache[ChannelAffinityUsageCacheCounters](cachex.HybridCacheConfig[ChannelAffinityUsageCacheCounters]{
|
||||
Namespace: cachex.Namespace(channelAffinityUsageCacheStatsNamespace),
|
||||
Redis: common.RDB,
|
||||
RedisEnabled: func() bool {
|
||||
return common.RedisEnabled && common.RDB != nil
|
||||
},
|
||||
RedisCodec: cachex.JSONCodec[ChannelAffinityUsageCacheCounters]{},
|
||||
Memory: func() *hot.HotCache[string, ChannelAffinityUsageCacheCounters] {
|
||||
return hot.NewHotCache[string, ChannelAffinityUsageCacheCounters](hot.LRU, capacity).
|
||||
WithTTL(time.Duration(defaultTTLSeconds) * time.Second).
|
||||
WithJanitor().
|
||||
Build()
|
||||
},
|
||||
})
|
||||
})
|
||||
return channelAffinityUsageCacheStatsCache
|
||||
}
|
||||
|
||||
func channelAffinityUsageCacheStatsLock(key string) *sync.Mutex {
|
||||
h := fnv.New32a()
|
||||
_, _ = h.Write([]byte(key))
|
||||
idx := h.Sum32() % uint32(len(channelAffinityUsageCacheStatsLocks))
|
||||
return &channelAffinityUsageCacheStatsLocks[idx]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user