fix: channel affinity (#2799)

* fix: channel affinity log styles

* fix: Issue with incorrect data storage when switching key sources

* feat: support not retrying after a single rule configuration fails

* fix: render channel affinity tooltip as multiline content

* feat: channel affinity cache hit

* fix: prevent ChannelAffinityUsageCacheModal infinite loading and hide data before fetch

* chore: format backend with gofmt and frontend with prettier/eslint autofix
This commit is contained in:
Seefs
2026-02-02 14:37:31 +08:00
committed by GitHub
parent 6c0e9403a2
commit 540cf6c991
61 changed files with 2012 additions and 1004 deletions

View File

@@ -2,6 +2,7 @@ package service
import (
"fmt"
"hash/fnv"
"regexp"
"strconv"
"strings"
@@ -9,6 +10,7 @@ import (
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/cachex"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/gin-gonic/gin"
@@ -21,14 +23,19 @@ const (
ginKeyChannelAffinityTTLSeconds = "channel_affinity_ttl_seconds"
ginKeyChannelAffinityMeta = "channel_affinity_meta"
ginKeyChannelAffinityLogInfo = "channel_affinity_log_info"
ginKeyChannelAffinitySkipRetry = "channel_affinity_skip_retry_on_failure"
channelAffinityCacheNamespace = "new-api:channel_affinity:v1"
channelAffinityCacheNamespace = "new-api:channel_affinity:v1"
channelAffinityUsageCacheStatsNamespace = "new-api:channel_affinity_usage_cache_stats:v1"
)
var (
channelAffinityCacheOnce sync.Once
channelAffinityCache *cachex.HybridCache[int]
channelAffinityUsageCacheStatsOnce sync.Once
channelAffinityUsageCacheStatsCache *cachex.HybridCache[ChannelAffinityUsageCacheCounters]
channelAffinityRegexCache sync.Map // map[string]*regexp.Regexp
)
@@ -36,15 +43,24 @@ type channelAffinityMeta struct {
CacheKey string
TTLSeconds int
RuleName string
SkipRetry bool
KeySourceType string
KeySourceKey string
KeySourcePath string
KeyHint string
KeyFingerprint string
UsingGroup string
ModelName string
RequestPath string
}
type ChannelAffinityStatsContext struct {
RuleName string
UsingGroup string
KeyFingerprint string
TTLSeconds int64
}
type ChannelAffinityCacheStats struct {
Enabled bool `json:"enabled"`
Total int `json:"total"`
@@ -338,6 +354,32 @@ func getChannelAffinityMeta(c *gin.Context) (channelAffinityMeta, bool) {
return meta, true
}
func GetChannelAffinityStatsContext(c *gin.Context) (ChannelAffinityStatsContext, bool) {
if c == nil {
return ChannelAffinityStatsContext{}, false
}
meta, ok := getChannelAffinityMeta(c)
if !ok {
return ChannelAffinityStatsContext{}, false
}
ruleName := strings.TrimSpace(meta.RuleName)
keyFp := strings.TrimSpace(meta.KeyFingerprint)
usingGroup := strings.TrimSpace(meta.UsingGroup)
if ruleName == "" || keyFp == "" {
return ChannelAffinityStatsContext{}, false
}
ttlSeconds := int64(meta.TTLSeconds)
if ttlSeconds <= 0 {
return ChannelAffinityStatsContext{}, false
}
return ChannelAffinityStatsContext{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
TTLSeconds: ttlSeconds,
}, true
}
func affinityFingerprint(s string) string {
if s == "" {
return ""
@@ -349,6 +391,19 @@ func affinityFingerprint(s string) string {
return hex
}
func buildChannelAffinityKeyHint(s string) string {
s = strings.TrimSpace(s)
if s == "" {
return ""
}
s = strings.ReplaceAll(s, "\n", " ")
s = strings.ReplaceAll(s, "\r", " ")
if len(s) <= 12 {
return s
}
return s[:4] + "..." + s[len(s)-4:]
}
func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup string) (int, bool) {
setting := operation_setting.GetChannelAffinitySetting()
if setting == nil || !setting.Enabled {
@@ -399,9 +454,11 @@ func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup
CacheKey: cacheKeyFull,
TTLSeconds: ttlSeconds,
RuleName: rule.Name,
SkipRetry: rule.SkipRetryOnFailure,
KeySourceType: strings.TrimSpace(usedSource.Type),
KeySourceKey: strings.TrimSpace(usedSource.Key),
KeySourcePath: strings.TrimSpace(usedSource.Path),
KeyHint: buildChannelAffinityKeyHint(affinityValue),
KeyFingerprint: affinityFingerprint(affinityValue),
UsingGroup: usingGroup,
ModelName: modelName,
@@ -422,6 +479,21 @@ func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup
return 0, false
}
func ShouldSkipRetryAfterChannelAffinityFailure(c *gin.Context) bool {
if c == nil {
return false
}
v, ok := c.Get(ginKeyChannelAffinitySkipRetry)
if !ok {
return false
}
b, ok := v.(bool)
if !ok {
return false
}
return b
}
func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int) {
if c == nil || channelID <= 0 {
return
@@ -430,6 +502,7 @@ func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int
if !ok {
return
}
c.Set(ginKeyChannelAffinitySkipRetry, meta.SkipRetry)
info := map[string]interface{}{
"reason": meta.RuleName,
"rule_name": meta.RuleName,
@@ -441,6 +514,7 @@ func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int
"key_source": meta.KeySourceType,
"key_key": meta.KeySourceKey,
"key_path": meta.KeySourcePath,
"key_hint": meta.KeyHint,
"key_fp": meta.KeyFingerprint,
}
c.Set(ginKeyChannelAffinityLogInfo, info)
@@ -485,3 +559,225 @@ func RecordChannelAffinity(c *gin.Context, channelID int) {
common.SysError(fmt.Sprintf("channel affinity cache set failed: key=%s, err=%v", cacheKey, err))
}
}
type ChannelAffinityUsageCacheStats struct {
RuleName string `json:"rule_name"`
UsingGroup string `json:"using_group"`
KeyFingerprint string `json:"key_fp"`
Hit int64 `json:"hit"`
Total int64 `json:"total"`
WindowSeconds int64 `json:"window_seconds"`
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
TotalTokens int64 `json:"total_tokens"`
CachedTokens int64 `json:"cached_tokens"`
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
LastSeenAt int64 `json:"last_seen_at"`
}
type ChannelAffinityUsageCacheCounters struct {
Hit int64 `json:"hit"`
Total int64 `json:"total"`
WindowSeconds int64 `json:"window_seconds"`
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
TotalTokens int64 `json:"total_tokens"`
CachedTokens int64 `json:"cached_tokens"`
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
LastSeenAt int64 `json:"last_seen_at"`
}
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) {
statsCtx, ok := GetChannelAffinityStatsContext(c)
if !ok {
return
}
observeChannelAffinityUsageCache(statsCtx, usage)
}
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
ruleName = strings.TrimSpace(ruleName)
usingGroup = strings.TrimSpace(usingGroup)
keyFp = strings.TrimSpace(keyFp)
entryKey := channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp)
if entryKey == "" {
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
}
}
cache := getChannelAffinityUsageCacheStatsCache()
v, found, err := cache.Get(entryKey)
if err != nil || !found {
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
}
}
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
Hit: v.Hit,
Total: v.Total,
WindowSeconds: v.WindowSeconds,
PromptTokens: v.PromptTokens,
CompletionTokens: v.CompletionTokens,
TotalTokens: v.TotalTokens,
CachedTokens: v.CachedTokens,
PromptCacheHitTokens: v.PromptCacheHitTokens,
LastSeenAt: v.LastSeenAt,
}
}
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) {
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
if entryKey == "" {
return
}
windowSeconds := statsCtx.TTLSeconds
if windowSeconds <= 0 {
return
}
cache := getChannelAffinityUsageCacheStatsCache()
ttl := time.Duration(windowSeconds) * time.Second
lock := channelAffinityUsageCacheStatsLock(entryKey)
lock.Lock()
defer lock.Unlock()
prev, found, err := cache.Get(entryKey)
if err != nil {
return
}
next := prev
if !found {
next = ChannelAffinityUsageCacheCounters{}
}
next.Total++
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
if hit {
next.Hit++
}
next.WindowSeconds = windowSeconds
next.LastSeenAt = time.Now().Unix()
next.CachedTokens += cachedTokens
next.PromptCacheHitTokens += promptCacheHitTokens
next.PromptTokens += int64(usagePromptTokens(usage))
next.CompletionTokens += int64(usageCompletionTokens(usage))
next.TotalTokens += int64(usageTotalTokens(usage))
_ = cache.SetWithTTL(entryKey, next, ttl)
}
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
ruleName = strings.TrimSpace(ruleName)
usingGroup = strings.TrimSpace(usingGroup)
keyFp = strings.TrimSpace(keyFp)
if ruleName == "" || keyFp == "" {
return ""
}
return ruleName + "\n" + usingGroup + "\n" + keyFp
}
func usageCacheSignals(usage *dto.Usage) (hit bool, cachedTokens int64, promptCacheHitTokens int64) {
if usage == nil {
return false, 0, 0
}
cached := int64(0)
if usage.PromptTokensDetails.CachedTokens > 0 {
cached = int64(usage.PromptTokensDetails.CachedTokens)
} else if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
cached = int64(usage.InputTokensDetails.CachedTokens)
}
pcht := int64(0)
if usage.PromptCacheHitTokens > 0 {
pcht = int64(usage.PromptCacheHitTokens)
}
return cached > 0 || pcht > 0, cached, pcht
}
func usagePromptTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.PromptTokens > 0 {
return usage.PromptTokens
}
return usage.InputTokens
}
func usageCompletionTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.CompletionTokens > 0 {
return usage.CompletionTokens
}
return usage.OutputTokens
}
func usageTotalTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.TotalTokens > 0 {
return usage.TotalTokens
}
pt := usagePromptTokens(usage)
ct := usageCompletionTokens(usage)
if pt > 0 || ct > 0 {
return pt + ct
}
return 0
}
func getChannelAffinityUsageCacheStatsCache() *cachex.HybridCache[ChannelAffinityUsageCacheCounters] {
channelAffinityUsageCacheStatsOnce.Do(func() {
setting := operation_setting.GetChannelAffinitySetting()
capacity := 100_000
defaultTTLSeconds := 3600
if setting != nil {
if setting.MaxEntries > 0 {
capacity = setting.MaxEntries
}
if setting.DefaultTTLSeconds > 0 {
defaultTTLSeconds = setting.DefaultTTLSeconds
}
}
channelAffinityUsageCacheStatsCache = cachex.NewHybridCache[ChannelAffinityUsageCacheCounters](cachex.HybridCacheConfig[ChannelAffinityUsageCacheCounters]{
Namespace: cachex.Namespace(channelAffinityUsageCacheStatsNamespace),
Redis: common.RDB,
RedisEnabled: func() bool {
return common.RedisEnabled && common.RDB != nil
},
RedisCodec: cachex.JSONCodec[ChannelAffinityUsageCacheCounters]{},
Memory: func() *hot.HotCache[string, ChannelAffinityUsageCacheCounters] {
return hot.NewHotCache[string, ChannelAffinityUsageCacheCounters](hot.LRU, capacity).
WithTTL(time.Duration(defaultTTLSeconds) * time.Second).
WithJanitor().
Build()
},
})
})
return channelAffinityUsageCacheStatsCache
}
func channelAffinityUsageCacheStatsLock(key string) *sync.Mutex {
h := fnv.New32a()
_, _ = h.Write([]byte(key))
idx := h.Sum32() % uint32(len(channelAffinityUsageCacheStatsLocks))
return &channelAffinityUsageCacheStatsLocks[idx]
}