feat: enhance concurrency queue with health check and admin endpoints

- Add queue health check for fast-fail when overloaded (P90 > threshold)
  - Implement socket identity verification with UUID token
  - Add wait time statistics (P50/P90/P99) and queue stats tracking
  - Add admin endpoints for queue stats and cleanup
  - Add CLEAR_CONCURRENCY_QUEUES_ON_STARTUP config option
  - Update documentation with troubleshooting and proxy config guide
This commit is contained in:
DaydreamCoding
2025-12-12 14:08:30 +08:00
committed by QTom
parent 403f609f69
commit 07633ddbf8
18 changed files with 3039 additions and 86 deletions

View File

@@ -584,6 +584,20 @@ class Application {
// 使用 Lua 脚本批量清理所有过期项
for (const key of keys) {
// 跳过非 Sorted Set 类型的键(这些键有各自的清理逻辑)
// - concurrency:queue:stats:* 是 Hash 类型
// - concurrency:queue:wait_times:* 是 List 类型
// - concurrency:queue:* (不含stats/wait_times) 是 String 类型
if (
key.startsWith('concurrency:queue:stats:') ||
key.startsWith('concurrency:queue:wait_times:') ||
(key.startsWith('concurrency:queue:') &&
!key.includes(':stats:') &&
!key.includes(':wait_times:'))
) {
continue
}
try {
const cleaned = await redis.client.eval(
`
@@ -633,6 +647,20 @@ class Application {
// 然后启动定时清理任务
userMessageQueueService.startCleanupTask()
})
// 🚦 清理服务重启后残留的并发排队计数器
// 多实例部署时建议关闭此开关,避免新实例启动时清空其他实例的队列计数
// 可通过 DELETE /admin/concurrency/queue 接口手动清理
const clearQueuesOnStartup = process.env.CLEAR_CONCURRENCY_QUEUES_ON_STARTUP !== 'false'
if (clearQueuesOnStartup) {
redis.clearAllConcurrencyQueues().catch((error) => {
logger.error('❌ Error clearing concurrency queues on startup:', error)
})
} else {
logger.info(
'🚦 Skipping concurrency queue cleanup on startup (CLEAR_CONCURRENCY_QUEUES_ON_STARTUP=false)'
)
}
}
setupGracefulShutdown() {