feat: enhance concurrency queue with health check and admin endpoints

- Add queue health check for fast-fail when overloaded (P90 > threshold)
  - Implement socket identity verification with UUID token
  - Add wait time statistics (P50/P90/P99) and queue stats tracking
  - Add admin endpoints for queue stats and cleanup
  - Add CLEAR_CONCURRENCY_QUEUES_ON_STARTUP config option
  - Update documentation with troubleshooting and proxy config guide
This commit is contained in:
DaydreamCoding
2025-12-12 14:08:30 +08:00
committed by QTom
parent 403f609f69
commit 07633ddbf8
18 changed files with 3039 additions and 86 deletions

View File

@@ -243,10 +243,11 @@ class BedrockRelayService {
isBackendError ? { backendError: queueResult.errorMessage } : {}
)
if (!res.headersSent) {
const existingConnection = res.getHeader ? res.getHeader('Connection') : null
res.writeHead(statusCode, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'x-user-message-queue-error': errorType
})
}
@@ -309,10 +310,17 @@ class BedrockRelayService {
}
// 设置SSE响应头
// ⚠️ 关键修复:尊重 auth.js 提前设置的 Connection: close
const existingConnection = res.getHeader ? res.getHeader('Connection') : null
if (existingConnection) {
logger.debug(
`🔌 [Bedrock Stream] Preserving existing Connection header: ${existingConnection}`
)
}
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Content-Type, Authorization'
})

View File

@@ -4,6 +4,7 @@ const logger = require('../utils/logger')
const config = require('../../config/config')
const { parseVendorPrefixedModel } = require('../utils/modelHelper')
const userMessageQueueService = require('./userMessageQueueService')
const { isStreamWritable } = require('../utils/streamHelper')
class CcrRelayService {
constructor() {
@@ -379,10 +380,13 @@ class CcrRelayService {
isBackendError ? { backendError: queueResult.errorMessage } : {}
)
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(statusCode, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'x-user-message-queue-error': errorType
})
}
@@ -606,10 +610,13 @@ class CcrRelayService {
// 设置错误响应的状态码和响应头
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
const errorHeaders = {
'Content-Type': response.headers['content-type'] || 'application/json',
'Cache-Control': 'no-cache',
Connection: 'keep-alive'
Connection: existingConnection || 'keep-alive'
}
// 避免 Transfer-Encoding 冲突,让 Express 自动处理
delete errorHeaders['Transfer-Encoding']
@@ -619,13 +626,13 @@ class CcrRelayService {
// 直接透传错误数据,不进行包装
response.data.on('data', (chunk) => {
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write(chunk)
}
})
response.data.on('end', () => {
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.end()
}
resolve() // 不抛出异常,正常完成流处理
@@ -659,11 +666,20 @@ class CcrRelayService {
})
// 设置响应头
// ⚠️ 关键修复:尊重 auth.js 提前设置的 Connection: close
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
if (existingConnection) {
logger.debug(
`🔌 [CCR Stream] Preserving existing Connection header: ${existingConnection}`
)
}
const headers = {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'Cache-Control'
}
@@ -702,12 +718,17 @@ class CcrRelayService {
}
// 写入到响应流
if (outputLine && !responseStream.destroyed) {
if (outputLine && isStreamWritable(responseStream)) {
responseStream.write(`${outputLine}\n`)
} else if (outputLine) {
// 客户端连接已断开,记录警告
logger.warn(
`⚠️ [CCR] Client disconnected during stream, skipping data for account: ${accountId}`
)
}
} else {
// 空行也需要传递
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write('\n')
}
}
@@ -718,10 +739,6 @@ class CcrRelayService {
})
response.data.on('end', () => {
if (!responseStream.destroyed) {
responseStream.end()
}
// 如果收集到使用统计数据,调用回调
if (usageCallback && Object.keys(collectedUsage).length > 0) {
try {
@@ -733,12 +750,26 @@ class CcrRelayService {
}
}
resolve()
if (isStreamWritable(responseStream)) {
// 等待数据完全 flush 到客户端后再 resolve
responseStream.end(() => {
logger.debug(
`🌊 CCR stream response completed and flushed | bytesWritten: ${responseStream.bytesWritten || 'unknown'}`
)
resolve()
})
} else {
// 连接已断开,记录警告
logger.warn(
`⚠️ [CCR] Client disconnected before stream end, data may not have been received | account: ${accountId}`
)
resolve()
}
})
response.data.on('error', (err) => {
logger.error('❌ Stream data error:', err)
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.end()
}
reject(err)
@@ -770,7 +801,7 @@ class CcrRelayService {
}
}
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write(`data: ${JSON.stringify(errorResponse)}\n\n`)
responseStream.end()
}

View File

@@ -10,6 +10,7 @@ const {
isAccountDisabledError
} = require('../utils/errorSanitizer')
const userMessageQueueService = require('./userMessageQueueService')
const { isStreamWritable } = require('../utils/streamHelper')
class ClaudeConsoleRelayService {
constructor() {
@@ -517,10 +518,13 @@ class ClaudeConsoleRelayService {
isBackendError ? { backendError: queueResult.errorMessage } : {}
)
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(statusCode, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'x-user-message-queue-error': errorType
})
}
@@ -878,7 +882,7 @@ class ClaudeConsoleRelayService {
`🧹 [Stream] [SANITIZED] Error response to client: ${JSON.stringify(sanitizedError)}`
)
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write(JSON.stringify(sanitizedError))
responseStream.end()
}
@@ -886,7 +890,7 @@ class ClaudeConsoleRelayService {
const sanitizedText = sanitizeErrorMessage(errorDataForCheck)
logger.error(`🧹 [Stream] [SANITIZED] Error response to client: ${sanitizedText}`)
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write(sanitizedText)
responseStream.end()
}
@@ -923,11 +927,22 @@ class ClaudeConsoleRelayService {
})
// 设置响应头
// ⚠️ 关键修复:尊重 auth.js 提前设置的 Connection: close
// 当并发队列功能启用时auth.js 会设置 Connection: close 来禁用 Keep-Alive
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
const connectionHeader = existingConnection || 'keep-alive'
if (existingConnection) {
logger.debug(
`🔌 [Console Stream] Preserving existing Connection header: ${existingConnection}`
)
}
responseStream.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: connectionHeader,
'X-Accel-Buffering': 'no'
})
}
@@ -953,20 +968,33 @@ class ClaudeConsoleRelayService {
buffer = lines.pop() || ''
// 转发数据并解析usage
if (lines.length > 0 && !responseStream.destroyed) {
const linesToForward = lines.join('\n') + (lines.length > 0 ? '\n' : '')
if (lines.length > 0) {
// 检查流是否可写(客户端连接是否有效)
if (isStreamWritable(responseStream)) {
const linesToForward = lines.join('\n') + (lines.length > 0 ? '\n' : '')
// 应用流转换器如果有
if (streamTransformer) {
const transformed = streamTransformer(linesToForward)
if (transformed) {
responseStream.write(transformed)
// 应用流转换器如果有
let dataToWrite = linesToForward
if (streamTransformer) {
const transformed = streamTransformer(linesToForward)
if (transformed) {
dataToWrite = transformed
} else {
dataToWrite = null
}
}
if (dataToWrite) {
responseStream.write(dataToWrite)
}
} else {
responseStream.write(linesToForward)
// 客户端连接已断开记录警告但仍继续解析usage
logger.warn(
`⚠️ [Console] Client disconnected during stream, skipping ${lines.length} lines for account: ${account?.name || accountId}`
)
}
// 解析SSE数据寻找usage信息
// 解析SSE数据寻找usage信息(无论连接状态如何)
for (const line of lines) {
if (line.startsWith('data:')) {
const jsonStr = line.slice(5).trimStart()
@@ -1074,7 +1102,7 @@ class ClaudeConsoleRelayService {
`❌ Error processing Claude Console stream data (Account: ${account?.name || accountId}):`,
error
)
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 如果有 streamTransformer如测试请求使用前端期望的格式
if (streamTransformer) {
responseStream.write(
@@ -1097,7 +1125,7 @@ class ClaudeConsoleRelayService {
response.data.on('end', () => {
try {
// 处理缓冲区中剩余的数据
if (buffer.trim() && !responseStream.destroyed) {
if (buffer.trim() && isStreamWritable(responseStream)) {
if (streamTransformer) {
const transformed = streamTransformer(buffer)
if (transformed) {
@@ -1146,12 +1174,33 @@ class ClaudeConsoleRelayService {
}
// 确保流正确结束
if (!responseStream.destroyed) {
responseStream.end()
}
if (isStreamWritable(responseStream)) {
// 📊 诊断日志:流结束前状态
logger.info(
`📤 [STREAM] Ending response | destroyed: ${responseStream.destroyed}, ` +
`socketDestroyed: ${responseStream.socket?.destroyed}, ` +
`socketBytesWritten: ${responseStream.socket?.bytesWritten || 0}`
)
logger.debug('🌊 Claude Console Claude stream response completed')
resolve()
// 禁用 Nagle 算法确保数据立即发送
if (responseStream.socket && !responseStream.socket.destroyed) {
responseStream.socket.setNoDelay(true)
}
// 等待数据完全 flush 到客户端后再 resolve
responseStream.end(() => {
logger.info(
`✅ [STREAM] Response ended and flushed | socketBytesWritten: ${responseStream.socket?.bytesWritten || 'unknown'}`
)
resolve()
})
} else {
// 连接已断开,记录警告
logger.warn(
`⚠️ [Console] Client disconnected before stream end, data may not have been received | account: ${account?.name || accountId}`
)
resolve()
}
} catch (error) {
logger.error('❌ Error processing stream end:', error)
reject(error)
@@ -1163,7 +1212,7 @@ class ClaudeConsoleRelayService {
`❌ Claude Console stream error (Account: ${account?.name || accountId}):`,
error
)
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 如果有 streamTransformer如测试请求使用前端期望的格式
if (streamTransformer) {
responseStream.write(
@@ -1211,14 +1260,17 @@ class ClaudeConsoleRelayService {
// 发送错误响应
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(error.response?.status || 500, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive'
Connection: existingConnection || 'keep-alive'
})
}
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 如果有 streamTransformer如测试请求使用前端期望的格式
if (streamTransformer) {
responseStream.write(
@@ -1388,7 +1440,7 @@ class ClaudeConsoleRelayService {
'Cache-Control': 'no-cache'
})
}
if (!responseStream.destroyed && !responseStream.writableEnded) {
if (isStreamWritable(responseStream)) {
responseStream.write(
`data: ${JSON.stringify({ type: 'test_complete', success: false, error: error.message })}\n\n`
)

View File

@@ -20,6 +20,15 @@ const DEFAULT_CONFIG = {
userMessageQueueDelayMs: 200, // 请求间隔(毫秒)
userMessageQueueTimeoutMs: 5000, // 队列等待超时(毫秒),优化后锁持有时间短无需长等待
userMessageQueueLockTtlMs: 5000, // 锁TTL毫秒请求发送后立即释放无需长TTL
// 并发请求排队配置
concurrentRequestQueueEnabled: false, // 是否启用并发请求排队(默认关闭)
concurrentRequestQueueMaxSize: 3, // 固定最小排队数默认3
concurrentRequestQueueMaxSizeMultiplier: 0, // 并发数的倍数默认0仅使用固定值
concurrentRequestQueueTimeoutMs: 10000, // 排队超时毫秒默认10秒
concurrentRequestQueueMaxRedisFailCount: 5, // 连续 Redis 失败阈值默认5次
// 排队健康检查配置
concurrentRequestQueueHealthCheckEnabled: true, // 是否启用排队健康检查(默认开启)
concurrentRequestQueueHealthThreshold: 0.8, // 健康检查阈值P90 >= 超时 × 阈值时拒绝新请求)
updatedAt: null,
updatedBy: null
}
@@ -105,7 +114,8 @@ class ClaudeRelayConfigService {
logger.info(`✅ Claude relay config updated by ${updatedBy}:`, {
claudeCodeOnlyEnabled: updatedConfig.claudeCodeOnlyEnabled,
globalSessionBindingEnabled: updatedConfig.globalSessionBindingEnabled
globalSessionBindingEnabled: updatedConfig.globalSessionBindingEnabled,
concurrentRequestQueueEnabled: updatedConfig.concurrentRequestQueueEnabled
})
return updatedConfig

View File

@@ -16,6 +16,7 @@ const { formatDateWithTimezone } = require('../utils/dateHelper')
const requestIdentityService = require('./requestIdentityService')
const { createClaudeTestPayload } = require('../utils/testPayloadHelper')
const userMessageQueueService = require('./userMessageQueueService')
const { isStreamWritable } = require('../utils/streamHelper')
class ClaudeRelayService {
constructor() {
@@ -1057,6 +1058,8 @@ class ClaudeRelayService {
logger.info(`🔗 指纹是这个: ${headers['User-Agent']}`)
logger.info(`🔗 指纹是这个: ${headers['User-Agent']}`)
// 根据模型和客户端传递的 anthropic-beta 动态设置 header
const modelId = requestPayload?.model || body?.model
const clientBetaHeader = clientHeaders?.['anthropic-beta']
@@ -1338,10 +1341,13 @@ class ClaudeRelayService {
isBackendError ? { backendError: queueResult.errorMessage } : {}
)
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(statusCode, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'x-user-message-queue-error': errorType
})
}
@@ -1699,7 +1705,7 @@ class ClaudeRelayService {
}
})()
}
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 解析 Claude API 返回的错误详情
let errorMessage = `Claude API error: ${res.statusCode}`
try {
@@ -1764,16 +1770,23 @@ class ClaudeRelayService {
buffer = lines.pop() || '' // 保留最后的不完整行
// 转发已处理的完整行到客户端
if (lines.length > 0 && !responseStream.destroyed) {
const linesToForward = lines.join('\n') + (lines.length > 0 ? '\n' : '')
// 如果有流转换器,应用转换
if (streamTransformer) {
const transformed = streamTransformer(linesToForward)
if (transformed) {
responseStream.write(transformed)
if (lines.length > 0) {
if (isStreamWritable(responseStream)) {
const linesToForward = lines.join('\n') + (lines.length > 0 ? '\n' : '')
// 如果有流转换器,应用转换
if (streamTransformer) {
const transformed = streamTransformer(linesToForward)
if (transformed) {
responseStream.write(transformed)
}
} else {
responseStream.write(linesToForward)
}
} else {
responseStream.write(linesToForward)
// 客户端连接已断开记录警告但仍继续解析usage
logger.warn(
`⚠️ [Official] Client disconnected during stream, skipping ${lines.length} lines for account: ${accountId}`
)
}
}
@@ -1878,7 +1891,7 @@ class ClaudeRelayService {
} catch (error) {
logger.error('❌ Error processing stream data:', error)
// 发送错误但不破坏流,让它自然结束
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.write('event: error\n')
responseStream.write(
`data: ${JSON.stringify({
@@ -1894,7 +1907,7 @@ class ClaudeRelayService {
res.on('end', async () => {
try {
// 处理缓冲区中剩余的数据
if (buffer.trim() && !responseStream.destroyed) {
if (buffer.trim() && isStreamWritable(responseStream)) {
if (streamTransformer) {
const transformed = streamTransformer(buffer)
if (transformed) {
@@ -1906,8 +1919,16 @@ class ClaudeRelayService {
}
// 确保流正确结束
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
responseStream.end()
logger.debug(
`🌊 Stream end called | bytesWritten: ${responseStream.bytesWritten || 'unknown'}`
)
} else {
// 连接已断开,记录警告
logger.warn(
`⚠️ [Official] Client disconnected before stream end, data may not have been received | account: ${account?.name || accountId}`
)
}
} catch (error) {
logger.error('❌ Error processing stream end:', error)
@@ -2105,14 +2126,17 @@ class ClaudeRelayService {
}
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(statusCode, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive'
Connection: existingConnection || 'keep-alive'
})
}
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 发送 SSE 错误事件
responseStream.write('event: error\n')
responseStream.write(
@@ -2132,13 +2156,16 @@ class ClaudeRelayService {
logger.error(`❌ Claude stream request timeout | Account: ${account?.name || accountId}`)
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(504, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive'
Connection: existingConnection || 'keep-alive'
})
}
if (!responseStream.destroyed) {
if (isStreamWritable(responseStream)) {
// 发送 SSE 错误事件
responseStream.write('event: error\n')
responseStream.write(
@@ -2453,10 +2480,13 @@ class ClaudeRelayService {
// 设置响应头
if (!responseStream.headersSent) {
const existingConnection = responseStream.getHeader
? responseStream.getHeader('Connection')
: null
responseStream.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
Connection: existingConnection || 'keep-alive',
'X-Accel-Buffering': 'no'
})
}
@@ -2484,7 +2514,7 @@ class ClaudeRelayService {
} catch (error) {
logger.error(`❌ Test account connection failed:`, error)
// 发送错误事件给前端
if (!responseStream.destroyed && !responseStream.writableEnded) {
if (isStreamWritable(responseStream)) {
try {
const errorMsg = error.message || '测试失败'
responseStream.write(`data: ${JSON.stringify({ type: 'error', error: errorMsg })}\n\n`)