mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-01-23 09:38:02 +00:00
feat: enhance concurrency queue with health check and admin endpoints
- Add queue health check for fast-fail when overloaded (P90 > threshold) - Implement socket identity verification with UUID token - Add wait time statistics (P50/P90/P99) and queue stats tracking - Add admin endpoints for queue stats and cleanup - Add CLEAR_CONCURRENCY_QUEUES_ON_STARTUP config option - Update documentation with troubleshooting and proxy config guide
This commit is contained in:
@@ -43,7 +43,11 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
|
||||
sessionBindingTtlDays,
|
||||
userMessageQueueEnabled,
|
||||
userMessageQueueDelayMs,
|
||||
userMessageQueueTimeoutMs
|
||||
userMessageQueueTimeoutMs,
|
||||
concurrentRequestQueueEnabled,
|
||||
concurrentRequestQueueMaxSize,
|
||||
concurrentRequestQueueMaxSizeMultiplier,
|
||||
concurrentRequestQueueTimeoutMs
|
||||
} = req.body
|
||||
|
||||
// 验证输入
|
||||
@@ -110,6 +114,54 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
|
||||
}
|
||||
}
|
||||
|
||||
// 验证并发请求排队配置
|
||||
if (
|
||||
concurrentRequestQueueEnabled !== undefined &&
|
||||
typeof concurrentRequestQueueEnabled !== 'boolean'
|
||||
) {
|
||||
return res.status(400).json({ error: 'concurrentRequestQueueEnabled must be a boolean' })
|
||||
}
|
||||
|
||||
if (concurrentRequestQueueMaxSize !== undefined) {
|
||||
if (
|
||||
typeof concurrentRequestQueueMaxSize !== 'number' ||
|
||||
!Number.isInteger(concurrentRequestQueueMaxSize) ||
|
||||
concurrentRequestQueueMaxSize < 1 ||
|
||||
concurrentRequestQueueMaxSize > 100
|
||||
) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ error: 'concurrentRequestQueueMaxSize must be an integer between 1 and 100' })
|
||||
}
|
||||
}
|
||||
|
||||
if (concurrentRequestQueueMaxSizeMultiplier !== undefined) {
|
||||
// 使用 Number.isFinite() 同时排除 NaN、Infinity、-Infinity 和非数字类型
|
||||
if (
|
||||
!Number.isFinite(concurrentRequestQueueMaxSizeMultiplier) ||
|
||||
concurrentRequestQueueMaxSizeMultiplier < 0 ||
|
||||
concurrentRequestQueueMaxSizeMultiplier > 10
|
||||
) {
|
||||
return res.status(400).json({
|
||||
error: 'concurrentRequestQueueMaxSizeMultiplier must be a finite number between 0 and 10'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (concurrentRequestQueueTimeoutMs !== undefined) {
|
||||
if (
|
||||
typeof concurrentRequestQueueTimeoutMs !== 'number' ||
|
||||
!Number.isInteger(concurrentRequestQueueTimeoutMs) ||
|
||||
concurrentRequestQueueTimeoutMs < 5000 ||
|
||||
concurrentRequestQueueTimeoutMs > 300000
|
||||
) {
|
||||
return res.status(400).json({
|
||||
error:
|
||||
'concurrentRequestQueueTimeoutMs must be an integer between 5000 and 300000 (5 seconds to 5 minutes)'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const updateData = {}
|
||||
if (claudeCodeOnlyEnabled !== undefined) {
|
||||
updateData.claudeCodeOnlyEnabled = claudeCodeOnlyEnabled
|
||||
@@ -132,6 +184,18 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
|
||||
if (userMessageQueueTimeoutMs !== undefined) {
|
||||
updateData.userMessageQueueTimeoutMs = userMessageQueueTimeoutMs
|
||||
}
|
||||
if (concurrentRequestQueueEnabled !== undefined) {
|
||||
updateData.concurrentRequestQueueEnabled = concurrentRequestQueueEnabled
|
||||
}
|
||||
if (concurrentRequestQueueMaxSize !== undefined) {
|
||||
updateData.concurrentRequestQueueMaxSize = concurrentRequestQueueMaxSize
|
||||
}
|
||||
if (concurrentRequestQueueMaxSizeMultiplier !== undefined) {
|
||||
updateData.concurrentRequestQueueMaxSizeMultiplier = concurrentRequestQueueMaxSizeMultiplier
|
||||
}
|
||||
if (concurrentRequestQueueTimeoutMs !== undefined) {
|
||||
updateData.concurrentRequestQueueTimeoutMs = concurrentRequestQueueTimeoutMs
|
||||
}
|
||||
|
||||
const updatedConfig = await claudeRelayConfigService.updateConfig(
|
||||
updateData,
|
||||
|
||||
@@ -8,6 +8,7 @@ const router = express.Router()
|
||||
const redis = require('../../models/redis')
|
||||
const logger = require('../../utils/logger')
|
||||
const { authenticateAdmin } = require('../../middleware/auth')
|
||||
const { calculateWaitTimeStats } = require('../../utils/statsHelper')
|
||||
|
||||
/**
|
||||
* GET /admin/concurrency
|
||||
@@ -17,17 +18,29 @@ router.get('/concurrency', authenticateAdmin, async (req, res) => {
|
||||
try {
|
||||
const status = await redis.getAllConcurrencyStatus()
|
||||
|
||||
// 为每个 API Key 获取排队计数
|
||||
const statusWithQueue = await Promise.all(
|
||||
status.map(async (s) => {
|
||||
const queueCount = await redis.getConcurrencyQueueCount(s.apiKeyId)
|
||||
return {
|
||||
...s,
|
||||
queueCount
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
// 计算汇总统计
|
||||
const summary = {
|
||||
totalKeys: status.length,
|
||||
totalActiveRequests: status.reduce((sum, s) => sum + s.activeCount, 0),
|
||||
totalExpiredRequests: status.reduce((sum, s) => sum + s.expiredCount, 0)
|
||||
totalKeys: statusWithQueue.length,
|
||||
totalActiveRequests: statusWithQueue.reduce((sum, s) => sum + s.activeCount, 0),
|
||||
totalExpiredRequests: statusWithQueue.reduce((sum, s) => sum + s.expiredCount, 0),
|
||||
totalQueuedRequests: statusWithQueue.reduce((sum, s) => sum + s.queueCount, 0)
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
summary,
|
||||
concurrencyStatus: status
|
||||
concurrencyStatus: statusWithQueue
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('❌ Failed to get concurrency status:', error)
|
||||
@@ -39,6 +52,156 @@ router.get('/concurrency', authenticateAdmin, async (req, res) => {
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* GET /admin/concurrency-queue/stats
|
||||
* 获取排队统计信息
|
||||
*/
|
||||
router.get('/concurrency-queue/stats', authenticateAdmin, async (req, res) => {
|
||||
try {
|
||||
// 获取所有有统计数据的 API Key
|
||||
const statsKeys = await redis.scanConcurrencyQueueStatsKeys()
|
||||
const queueKeys = await redis.scanConcurrencyQueueKeys()
|
||||
|
||||
// 合并所有相关的 API Key
|
||||
const allApiKeyIds = [...new Set([...statsKeys, ...queueKeys])]
|
||||
|
||||
// 获取各 API Key 的详细统计
|
||||
const perKeyStats = await Promise.all(
|
||||
allApiKeyIds.map(async (apiKeyId) => {
|
||||
const [queueCount, stats, waitTimes] = await Promise.all([
|
||||
redis.getConcurrencyQueueCount(apiKeyId),
|
||||
redis.getConcurrencyQueueStats(apiKeyId),
|
||||
redis.getQueueWaitTimes(apiKeyId)
|
||||
])
|
||||
|
||||
return {
|
||||
apiKeyId,
|
||||
currentQueueCount: queueCount,
|
||||
stats,
|
||||
waitTimeStats: calculateWaitTimeStats(waitTimes)
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
// 获取全局等待时间统计
|
||||
const globalWaitTimes = await redis.getGlobalQueueWaitTimes()
|
||||
const globalWaitTimeStats = calculateWaitTimeStats(globalWaitTimes)
|
||||
|
||||
// 计算全局汇总
|
||||
const globalStats = {
|
||||
totalEntered: perKeyStats.reduce((sum, s) => sum + s.stats.entered, 0),
|
||||
totalSuccess: perKeyStats.reduce((sum, s) => sum + s.stats.success, 0),
|
||||
totalTimeout: perKeyStats.reduce((sum, s) => sum + s.stats.timeout, 0),
|
||||
totalCancelled: perKeyStats.reduce((sum, s) => sum + s.stats.cancelled, 0),
|
||||
totalSocketChanged: perKeyStats.reduce((sum, s) => sum + (s.stats.socket_changed || 0), 0),
|
||||
totalRejectedOverload: perKeyStats.reduce(
|
||||
(sum, s) => sum + (s.stats.rejected_overload || 0),
|
||||
0
|
||||
),
|
||||
currentTotalQueued: perKeyStats.reduce((sum, s) => sum + s.currentQueueCount, 0),
|
||||
// 队列资源利用率指标
|
||||
peakQueueSize:
|
||||
perKeyStats.length > 0 ? Math.max(...perKeyStats.map((s) => s.currentQueueCount)) : 0,
|
||||
avgQueueSize:
|
||||
perKeyStats.length > 0
|
||||
? Math.round(
|
||||
perKeyStats.reduce((sum, s) => sum + s.currentQueueCount, 0) / perKeyStats.length
|
||||
)
|
||||
: 0,
|
||||
activeApiKeys: perKeyStats.filter((s) => s.currentQueueCount > 0).length
|
||||
}
|
||||
|
||||
// 计算成功率
|
||||
if (globalStats.totalEntered > 0) {
|
||||
globalStats.successRate = Math.round(
|
||||
(globalStats.totalSuccess / globalStats.totalEntered) * 100
|
||||
)
|
||||
globalStats.timeoutRate = Math.round(
|
||||
(globalStats.totalTimeout / globalStats.totalEntered) * 100
|
||||
)
|
||||
globalStats.cancelledRate = Math.round(
|
||||
(globalStats.totalCancelled / globalStats.totalEntered) * 100
|
||||
)
|
||||
}
|
||||
|
||||
// 从全局等待时间统计中提取关键指标
|
||||
if (globalWaitTimeStats) {
|
||||
globalStats.avgWaitTimeMs = globalWaitTimeStats.avg
|
||||
globalStats.p50WaitTimeMs = globalWaitTimeStats.p50
|
||||
globalStats.p90WaitTimeMs = globalWaitTimeStats.p90
|
||||
globalStats.p99WaitTimeMs = globalWaitTimeStats.p99
|
||||
// 多实例采样策略标记(详见 design.md Decision 9)
|
||||
// 全局 P90 仅用于可视化和监控,不用于系统决策
|
||||
// 健康检查使用 API Key 级别的 P90(每 Key 独立采样)
|
||||
globalWaitTimeStats.globalP90ForVisualizationOnly = true
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
globalStats,
|
||||
globalWaitTimeStats,
|
||||
perKeyStats
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('❌ Failed to get queue stats:', error)
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to get queue stats',
|
||||
message: error.message
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* DELETE /admin/concurrency-queue/:apiKeyId
|
||||
* 清理特定 API Key 的排队计数
|
||||
*/
|
||||
router.delete('/concurrency-queue/:apiKeyId', authenticateAdmin, async (req, res) => {
|
||||
try {
|
||||
const { apiKeyId } = req.params
|
||||
await redis.clearConcurrencyQueue(apiKeyId)
|
||||
|
||||
logger.warn(`🧹 Admin ${req.admin?.username || 'unknown'} cleared queue for key ${apiKeyId}`)
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Successfully cleared queue for API key ${apiKeyId}`
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error(`❌ Failed to clear queue for ${req.params.apiKeyId}:`, error)
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to clear queue',
|
||||
message: error.message
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* DELETE /admin/concurrency-queue
|
||||
* 清理所有排队计数
|
||||
*/
|
||||
router.delete('/concurrency-queue', authenticateAdmin, async (req, res) => {
|
||||
try {
|
||||
const cleared = await redis.clearAllConcurrencyQueues()
|
||||
|
||||
logger.warn(`🧹 Admin ${req.admin?.username || 'unknown'} cleared ALL queues`)
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: 'Successfully cleared all queues',
|
||||
cleared
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('❌ Failed to clear all queues:', error)
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to clear all queues',
|
||||
message: error.message
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* GET /admin/concurrency/:apiKeyId
|
||||
* 获取特定 API Key 的并发状态详情
|
||||
@@ -47,10 +210,14 @@ router.get('/concurrency/:apiKeyId', authenticateAdmin, async (req, res) => {
|
||||
try {
|
||||
const { apiKeyId } = req.params
|
||||
const status = await redis.getConcurrencyStatus(apiKeyId)
|
||||
const queueCount = await redis.getConcurrencyQueueCount(apiKeyId)
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
concurrencyStatus: status
|
||||
concurrencyStatus: {
|
||||
...status,
|
||||
queueCount
|
||||
}
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error(`❌ Failed to get concurrency status for ${req.params.apiKeyId}:`, error)
|
||||
|
||||
Reference in New Issue
Block a user