mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-01-22 16:43:35 +00:00
feat: 优化错误处理机制和代码重构
- 将5xx错误阈值从10次降低到3次,符合行业标准(AWS ELB: 2次, K8s: 3次) - 新增网络超时(ETIMEDOUT)错误处理,触发账户降级机制 - 重构错误处理逻辑,提取统一方法_handleServerError,消除75%重复代码 - 支持不同上下文的错误日志(Network, Request, Stream等) - 修复流式请求中的参数作用域问题,确保错误处理一致性 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -208,19 +208,7 @@ class ClaudeRelayService {
|
||||
// 检查是否为5xx状态码
|
||||
else if (response.statusCode >= 500 && response.statusCode < 600) {
|
||||
logger.warn(`🔥 Server error (${response.statusCode}) detected for account ${accountId}`)
|
||||
// 记录5xx错误
|
||||
await claudeAccountService.recordServerError(accountId, response.statusCode)
|
||||
// 检查是否需要标记为临时错误状态(连续3次500)
|
||||
const errorCount = await claudeAccountService.getServerErrorCount(accountId)
|
||||
logger.info(
|
||||
`🔥 Account ${accountId} has ${errorCount} consecutive 5xx errors in the last 5 minutes`
|
||||
)
|
||||
if (errorCount > 10) {
|
||||
logger.error(
|
||||
`❌ Account ${accountId} exceeded 5xx error threshold (${errorCount} errors), marking as temp_error`
|
||||
)
|
||||
await claudeAccountService.markAccountTempError(accountId, sessionHash)
|
||||
}
|
||||
await this._handleServerError(accountId, response.statusCode, sessionHash)
|
||||
}
|
||||
// 检查是否为429状态码
|
||||
else if (response.statusCode === 429) {
|
||||
@@ -742,7 +730,7 @@ class ClaudeRelayService {
|
||||
onRequest(req)
|
||||
}
|
||||
|
||||
req.on('error', (error) => {
|
||||
req.on('error', async (error) => {
|
||||
console.error(': ❌ ', error)
|
||||
logger.error('❌ Claude API request error:', error.message, {
|
||||
code: error.code,
|
||||
@@ -762,14 +750,19 @@ class ClaudeRelayService {
|
||||
errorMessage = 'Connection refused by Claude API server'
|
||||
} else if (error.code === 'ETIMEDOUT') {
|
||||
errorMessage = 'Connection timed out to Claude API server'
|
||||
|
||||
await this._handleServerError(accountId, 504, null, 'Network')
|
||||
}
|
||||
|
||||
reject(new Error(errorMessage))
|
||||
})
|
||||
|
||||
req.on('timeout', () => {
|
||||
req.on('timeout', async () => {
|
||||
req.destroy()
|
||||
logger.error('❌ Claude API request timeout')
|
||||
|
||||
await this._handleServerError(accountId, 504, null, 'Request')
|
||||
|
||||
reject(new Error('Request timeout'))
|
||||
})
|
||||
|
||||
@@ -989,19 +982,7 @@ class ClaudeRelayService {
|
||||
logger.warn(
|
||||
`🔥 [Stream] Server error (${res.statusCode}) detected for account ${accountId}`
|
||||
)
|
||||
// 记录5xx错误
|
||||
await claudeAccountService.recordServerError(accountId, res.statusCode)
|
||||
// 检查是否需要标记为临时错误状态(连续3次500)
|
||||
const errorCount = await claudeAccountService.getServerErrorCount(accountId)
|
||||
logger.info(
|
||||
`🔥 [Stream] Account ${accountId} has ${errorCount} consecutive 5xx errors in the last 5 minutes`
|
||||
)
|
||||
if (errorCount > 10) {
|
||||
logger.error(
|
||||
`❌ [Stream] Account ${accountId} exceeded 5xx error threshold (${errorCount} errors), marking as temp_error`
|
||||
)
|
||||
await claudeAccountService.markAccountTempError(accountId, sessionHash)
|
||||
}
|
||||
await this._handleServerError(accountId, res.statusCode, sessionHash, '[Stream]')
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1337,7 +1318,7 @@ class ClaudeRelayService {
|
||||
})
|
||||
})
|
||||
|
||||
req.on('error', (error) => {
|
||||
req.on('error', async (error) => {
|
||||
logger.error('❌ Claude stream request error:', error.message, {
|
||||
code: error.code,
|
||||
errno: error.errno,
|
||||
@@ -1384,9 +1365,10 @@ class ClaudeRelayService {
|
||||
reject(error)
|
||||
})
|
||||
|
||||
req.on('timeout', () => {
|
||||
req.on('timeout', async () => {
|
||||
req.destroy()
|
||||
logger.error('❌ Claude stream request timeout')
|
||||
|
||||
if (!responseStream.headersSent) {
|
||||
responseStream.writeHead(504, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
@@ -1486,7 +1468,7 @@ class ClaudeRelayService {
|
||||
})
|
||||
})
|
||||
|
||||
req.on('error', (error) => {
|
||||
req.on('error', async (error) => {
|
||||
logger.error('❌ Claude stream request error:', error.message, {
|
||||
code: error.code,
|
||||
errno: error.errno,
|
||||
@@ -1533,9 +1515,10 @@ class ClaudeRelayService {
|
||||
reject(error)
|
||||
})
|
||||
|
||||
req.on('timeout', () => {
|
||||
req.on('timeout', async () => {
|
||||
req.destroy()
|
||||
logger.error('❌ Claude stream request timeout')
|
||||
|
||||
if (!responseStream.headersSent) {
|
||||
responseStream.writeHead(504, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
@@ -1572,6 +1555,33 @@ class ClaudeRelayService {
|
||||
})
|
||||
}
|
||||
|
||||
// 🛠️ 统一的错误处理方法
|
||||
async _handleServerError(accountId, statusCode, sessionHash = null, context = '') {
|
||||
try {
|
||||
await claudeAccountService.recordServerError(accountId, statusCode)
|
||||
const errorCount = await claudeAccountService.getServerErrorCount(accountId)
|
||||
|
||||
// 根据错误类型设置不同的阈值和日志前缀
|
||||
const isTimeout = statusCode === 504
|
||||
const threshold = 3 // 统一使用3次阈值
|
||||
const prefix = context ? `${context} ` : ''
|
||||
|
||||
logger.warn(
|
||||
`⏱️ ${prefix}${isTimeout ? 'Timeout' : 'Server'} error for account ${accountId}, error count: ${errorCount}/${threshold}`
|
||||
)
|
||||
|
||||
if (errorCount > threshold) {
|
||||
const errorTypeLabel = isTimeout ? 'timeout' : '5xx'
|
||||
logger.error(
|
||||
`❌ ${prefix}Account ${accountId} exceeded ${errorTypeLabel} error threshold (${errorCount} errors), marking as temp_error`
|
||||
)
|
||||
await claudeAccountService.markAccountTempError(accountId, sessionHash)
|
||||
}
|
||||
} catch (handlingError) {
|
||||
logger.error(`❌ Failed to handle ${context} server error:`, handlingError)
|
||||
}
|
||||
}
|
||||
|
||||
// 🔄 重试逻辑
|
||||
async _retryRequest(requestFunc, maxRetries = 3) {
|
||||
let lastError
|
||||
|
||||
Reference in New Issue
Block a user