feat: enhance concurrency queue with health check and admin endpoints

- Add queue health check for fast-fail when overloaded (P90 > threshold)
  - Implement socket identity verification with UUID token
  - Add wait time statistics (P50/P90/P99) and queue stats tracking
  - Add admin endpoints for queue stats and cleanup
  - Add CLEAR_CONCURRENCY_QUEUES_ON_STARTUP config option
  - Update documentation with troubleshooting and proxy config guide
This commit is contained in:
DaydreamCoding
2025-12-12 14:08:30 +08:00
committed by QTom
parent 403f609f69
commit 07633ddbf8
18 changed files with 3039 additions and 86 deletions

View File

@@ -43,7 +43,11 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
sessionBindingTtlDays,
userMessageQueueEnabled,
userMessageQueueDelayMs,
userMessageQueueTimeoutMs
userMessageQueueTimeoutMs,
concurrentRequestQueueEnabled,
concurrentRequestQueueMaxSize,
concurrentRequestQueueMaxSizeMultiplier,
concurrentRequestQueueTimeoutMs
} = req.body
// 验证输入
@@ -110,6 +114,54 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
}
}
// 验证并发请求排队配置
if (
concurrentRequestQueueEnabled !== undefined &&
typeof concurrentRequestQueueEnabled !== 'boolean'
) {
return res.status(400).json({ error: 'concurrentRequestQueueEnabled must be a boolean' })
}
if (concurrentRequestQueueMaxSize !== undefined) {
if (
typeof concurrentRequestQueueMaxSize !== 'number' ||
!Number.isInteger(concurrentRequestQueueMaxSize) ||
concurrentRequestQueueMaxSize < 1 ||
concurrentRequestQueueMaxSize > 100
) {
return res
.status(400)
.json({ error: 'concurrentRequestQueueMaxSize must be an integer between 1 and 100' })
}
}
if (concurrentRequestQueueMaxSizeMultiplier !== undefined) {
// 使用 Number.isFinite() 同时排除 NaN、Infinity、-Infinity 和非数字类型
if (
!Number.isFinite(concurrentRequestQueueMaxSizeMultiplier) ||
concurrentRequestQueueMaxSizeMultiplier < 0 ||
concurrentRequestQueueMaxSizeMultiplier > 10
) {
return res.status(400).json({
error: 'concurrentRequestQueueMaxSizeMultiplier must be a finite number between 0 and 10'
})
}
}
if (concurrentRequestQueueTimeoutMs !== undefined) {
if (
typeof concurrentRequestQueueTimeoutMs !== 'number' ||
!Number.isInteger(concurrentRequestQueueTimeoutMs) ||
concurrentRequestQueueTimeoutMs < 5000 ||
concurrentRequestQueueTimeoutMs > 300000
) {
return res.status(400).json({
error:
'concurrentRequestQueueTimeoutMs must be an integer between 5000 and 300000 (5 seconds to 5 minutes)'
})
}
}
const updateData = {}
if (claudeCodeOnlyEnabled !== undefined) {
updateData.claudeCodeOnlyEnabled = claudeCodeOnlyEnabled
@@ -132,6 +184,18 @@ router.put('/claude-relay-config', authenticateAdmin, async (req, res) => {
if (userMessageQueueTimeoutMs !== undefined) {
updateData.userMessageQueueTimeoutMs = userMessageQueueTimeoutMs
}
if (concurrentRequestQueueEnabled !== undefined) {
updateData.concurrentRequestQueueEnabled = concurrentRequestQueueEnabled
}
if (concurrentRequestQueueMaxSize !== undefined) {
updateData.concurrentRequestQueueMaxSize = concurrentRequestQueueMaxSize
}
if (concurrentRequestQueueMaxSizeMultiplier !== undefined) {
updateData.concurrentRequestQueueMaxSizeMultiplier = concurrentRequestQueueMaxSizeMultiplier
}
if (concurrentRequestQueueTimeoutMs !== undefined) {
updateData.concurrentRequestQueueTimeoutMs = concurrentRequestQueueTimeoutMs
}
const updatedConfig = await claudeRelayConfigService.updateConfig(
updateData,

View File

@@ -8,6 +8,7 @@ const router = express.Router()
const redis = require('../../models/redis')
const logger = require('../../utils/logger')
const { authenticateAdmin } = require('../../middleware/auth')
const { calculateWaitTimeStats } = require('../../utils/statsHelper')
/**
* GET /admin/concurrency
@@ -17,17 +18,29 @@ router.get('/concurrency', authenticateAdmin, async (req, res) => {
try {
const status = await redis.getAllConcurrencyStatus()
// 为每个 API Key 获取排队计数
const statusWithQueue = await Promise.all(
status.map(async (s) => {
const queueCount = await redis.getConcurrencyQueueCount(s.apiKeyId)
return {
...s,
queueCount
}
})
)
// 计算汇总统计
const summary = {
totalKeys: status.length,
totalActiveRequests: status.reduce((sum, s) => sum + s.activeCount, 0),
totalExpiredRequests: status.reduce((sum, s) => sum + s.expiredCount, 0)
totalKeys: statusWithQueue.length,
totalActiveRequests: statusWithQueue.reduce((sum, s) => sum + s.activeCount, 0),
totalExpiredRequests: statusWithQueue.reduce((sum, s) => sum + s.expiredCount, 0),
totalQueuedRequests: statusWithQueue.reduce((sum, s) => sum + s.queueCount, 0)
}
res.json({
success: true,
summary,
concurrencyStatus: status
concurrencyStatus: statusWithQueue
})
} catch (error) {
logger.error('❌ Failed to get concurrency status:', error)
@@ -39,6 +52,156 @@ router.get('/concurrency', authenticateAdmin, async (req, res) => {
}
})
/**
* GET /admin/concurrency-queue/stats
* 获取排队统计信息
*/
router.get('/concurrency-queue/stats', authenticateAdmin, async (req, res) => {
try {
// 获取所有有统计数据的 API Key
const statsKeys = await redis.scanConcurrencyQueueStatsKeys()
const queueKeys = await redis.scanConcurrencyQueueKeys()
// 合并所有相关的 API Key
const allApiKeyIds = [...new Set([...statsKeys, ...queueKeys])]
// 获取各 API Key 的详细统计
const perKeyStats = await Promise.all(
allApiKeyIds.map(async (apiKeyId) => {
const [queueCount, stats, waitTimes] = await Promise.all([
redis.getConcurrencyQueueCount(apiKeyId),
redis.getConcurrencyQueueStats(apiKeyId),
redis.getQueueWaitTimes(apiKeyId)
])
return {
apiKeyId,
currentQueueCount: queueCount,
stats,
waitTimeStats: calculateWaitTimeStats(waitTimes)
}
})
)
// 获取全局等待时间统计
const globalWaitTimes = await redis.getGlobalQueueWaitTimes()
const globalWaitTimeStats = calculateWaitTimeStats(globalWaitTimes)
// 计算全局汇总
const globalStats = {
totalEntered: perKeyStats.reduce((sum, s) => sum + s.stats.entered, 0),
totalSuccess: perKeyStats.reduce((sum, s) => sum + s.stats.success, 0),
totalTimeout: perKeyStats.reduce((sum, s) => sum + s.stats.timeout, 0),
totalCancelled: perKeyStats.reduce((sum, s) => sum + s.stats.cancelled, 0),
totalSocketChanged: perKeyStats.reduce((sum, s) => sum + (s.stats.socket_changed || 0), 0),
totalRejectedOverload: perKeyStats.reduce(
(sum, s) => sum + (s.stats.rejected_overload || 0),
0
),
currentTotalQueued: perKeyStats.reduce((sum, s) => sum + s.currentQueueCount, 0),
// 队列资源利用率指标
peakQueueSize:
perKeyStats.length > 0 ? Math.max(...perKeyStats.map((s) => s.currentQueueCount)) : 0,
avgQueueSize:
perKeyStats.length > 0
? Math.round(
perKeyStats.reduce((sum, s) => sum + s.currentQueueCount, 0) / perKeyStats.length
)
: 0,
activeApiKeys: perKeyStats.filter((s) => s.currentQueueCount > 0).length
}
// 计算成功率
if (globalStats.totalEntered > 0) {
globalStats.successRate = Math.round(
(globalStats.totalSuccess / globalStats.totalEntered) * 100
)
globalStats.timeoutRate = Math.round(
(globalStats.totalTimeout / globalStats.totalEntered) * 100
)
globalStats.cancelledRate = Math.round(
(globalStats.totalCancelled / globalStats.totalEntered) * 100
)
}
// 从全局等待时间统计中提取关键指标
if (globalWaitTimeStats) {
globalStats.avgWaitTimeMs = globalWaitTimeStats.avg
globalStats.p50WaitTimeMs = globalWaitTimeStats.p50
globalStats.p90WaitTimeMs = globalWaitTimeStats.p90
globalStats.p99WaitTimeMs = globalWaitTimeStats.p99
// 多实例采样策略标记(详见 design.md Decision 9
// 全局 P90 仅用于可视化和监控,不用于系统决策
// 健康检查使用 API Key 级别的 P90每 Key 独立采样)
globalWaitTimeStats.globalP90ForVisualizationOnly = true
}
res.json({
success: true,
globalStats,
globalWaitTimeStats,
perKeyStats
})
} catch (error) {
logger.error('❌ Failed to get queue stats:', error)
res.status(500).json({
success: false,
error: 'Failed to get queue stats',
message: error.message
})
}
})
/**
* DELETE /admin/concurrency-queue/:apiKeyId
* 清理特定 API Key 的排队计数
*/
router.delete('/concurrency-queue/:apiKeyId', authenticateAdmin, async (req, res) => {
try {
const { apiKeyId } = req.params
await redis.clearConcurrencyQueue(apiKeyId)
logger.warn(`🧹 Admin ${req.admin?.username || 'unknown'} cleared queue for key ${apiKeyId}`)
res.json({
success: true,
message: `Successfully cleared queue for API key ${apiKeyId}`
})
} catch (error) {
logger.error(`❌ Failed to clear queue for ${req.params.apiKeyId}:`, error)
res.status(500).json({
success: false,
error: 'Failed to clear queue',
message: error.message
})
}
})
/**
* DELETE /admin/concurrency-queue
* 清理所有排队计数
*/
router.delete('/concurrency-queue', authenticateAdmin, async (req, res) => {
try {
const cleared = await redis.clearAllConcurrencyQueues()
logger.warn(`🧹 Admin ${req.admin?.username || 'unknown'} cleared ALL queues`)
res.json({
success: true,
message: 'Successfully cleared all queues',
cleared
})
} catch (error) {
logger.error('❌ Failed to clear all queues:', error)
res.status(500).json({
success: false,
error: 'Failed to clear all queues',
message: error.message
})
}
})
/**
* GET /admin/concurrency/:apiKeyId
* 获取特定 API Key 的并发状态详情
@@ -47,10 +210,14 @@ router.get('/concurrency/:apiKeyId', authenticateAdmin, async (req, res) => {
try {
const { apiKeyId } = req.params
const status = await redis.getConcurrencyStatus(apiKeyId)
const queueCount = await redis.getConcurrencyQueueCount(apiKeyId)
res.json({
success: true,
concurrencyStatus: status
concurrencyStatus: {
...status,
queueCount
}
})
} catch (error) {
logger.error(`❌ Failed to get concurrency status for ${req.params.apiKeyId}:`, error)

View File

@@ -190,12 +190,42 @@ async function handleMessagesRequest(req, res) {
)
if (isStream) {
// 🔍 检查客户端连接是否仍然有效(可能在并发排队等待期间断开)
if (res.destroyed || res.socket?.destroyed || res.writableEnded) {
logger.warn(
`⚠️ Client disconnected before stream response could start for key: ${req.apiKey?.name || 'unknown'}`
)
return undefined
}
// 流式响应 - 只使用官方真实usage数据
res.setHeader('Content-Type', 'text/event-stream')
res.setHeader('Cache-Control', 'no-cache')
res.setHeader('Connection', 'keep-alive')
res.setHeader('Access-Control-Allow-Origin', '*')
res.setHeader('X-Accel-Buffering', 'no') // 禁用 Nginx 缓冲
// ⚠️ 检查 headers 是否已发送(可能在排队心跳时已设置)
if (!res.headersSent) {
res.setHeader('Content-Type', 'text/event-stream')
res.setHeader('Cache-Control', 'no-cache')
// ⚠️ 关键修复:尊重 auth.js 提前设置的 Connection: close
// 当并发队列功能启用时auth.js 会设置 Connection: close 来禁用 Keep-Alive
// 这里只在没有设置过 Connection 头时才设置 keep-alive
const existingConnection = res.getHeader('Connection')
if (!existingConnection) {
res.setHeader('Connection', 'keep-alive')
} else {
logger.api(
`🔌 [STREAM] Preserving existing Connection header: ${existingConnection} for key: ${req.apiKey?.name || 'unknown'}`
)
}
res.setHeader('Access-Control-Allow-Origin', '*')
res.setHeader('X-Accel-Buffering', 'no') // 禁用 Nginx 缓冲
} else {
logger.debug(
`📤 [STREAM] Headers already sent, skipping setHeader for key: ${req.apiKey?.name || 'unknown'}`
)
}
// 禁用 Nagle 算法,确保数据立即发送
if (res.socket && typeof res.socket.setNoDelay === 'function') {
@@ -657,12 +687,61 @@ async function handleMessagesRequest(req, res) {
}
}, 1000) // 1秒后检查
} else {
// 🔍 检查客户端连接是否仍然有效(可能在并发排队等待期间断开)
if (res.destroyed || res.socket?.destroyed || res.writableEnded) {
logger.warn(
`⚠️ Client disconnected before non-stream request could start for key: ${req.apiKey?.name || 'unknown'}`
)
return undefined
}
// 非流式响应 - 只使用官方真实usage数据
logger.info('📄 Starting non-streaming request', {
apiKeyId: req.apiKey.id,
apiKeyName: req.apiKey.name
})
// 📊 监听 socket 事件以追踪连接状态变化
const nonStreamSocket = res.socket
let _clientClosedConnection = false
let _socketCloseTime = null
if (nonStreamSocket) {
const onSocketEnd = () => {
_clientClosedConnection = true
_socketCloseTime = Date.now()
logger.warn(
`⚠️ [NON-STREAM] Socket 'end' event - client sent FIN | key: ${req.apiKey?.name}, ` +
`requestId: ${req.requestId}, elapsed: ${Date.now() - startTime}ms`
)
}
const onSocketClose = () => {
_clientClosedConnection = true
logger.warn(
`⚠️ [NON-STREAM] Socket 'close' event | key: ${req.apiKey?.name}, ` +
`requestId: ${req.requestId}, elapsed: ${Date.now() - startTime}ms, ` +
`hadError: ${nonStreamSocket.destroyed}`
)
}
const onSocketError = (err) => {
logger.error(
`❌ [NON-STREAM] Socket error | key: ${req.apiKey?.name}, ` +
`requestId: ${req.requestId}, error: ${err.message}`
)
}
nonStreamSocket.once('end', onSocketEnd)
nonStreamSocket.once('close', onSocketClose)
nonStreamSocket.once('error', onSocketError)
// 清理监听器(在响应结束后)
res.once('finish', () => {
nonStreamSocket.removeListener('end', onSocketEnd)
nonStreamSocket.removeListener('close', onSocketClose)
nonStreamSocket.removeListener('error', onSocketError)
})
}
// 生成会话哈希用于sticky会话
const sessionHash = sessionHelper.generateSessionHash(req.body)
@@ -867,6 +946,15 @@ async function handleMessagesRequest(req, res) {
bodyLength: response.body ? response.body.length : 0
})
// 🔍 检查客户端连接是否仍然有效
// 在长时间请求过程中,客户端可能已经断开连接(超时、用户取消等)
if (res.destroyed || res.socket?.destroyed || res.writableEnded) {
logger.warn(
`⚠️ Client disconnected before non-stream response could be sent for key: ${req.apiKey?.name || 'unknown'}`
)
return undefined
}
res.status(response.statusCode)
// 设置响应头,避免 Content-Length 和 Transfer-Encoding 冲突
@@ -932,10 +1020,12 @@ async function handleMessagesRequest(req, res) {
logger.warn('⚠️ No usage data found in Claude API JSON response')
}
// 使用 Express 内建的 res.json() 发送响应(简单可靠)
res.json(jsonData)
} catch (parseError) {
logger.warn('⚠️ Failed to parse Claude API response as JSON:', parseError.message)
logger.info('📄 Raw response body:', response.body)
// 使用 Express 内建的 res.send() 发送响应(简单可靠)
res.send(response.body)
}