mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-01-22 16:43:35 +00:00
feat: 添加上游不稳定错误检测与账户临时不可用机制
## 背景
当上游 API(如 Anthropic、AWS Bedrock 等)出现临时故障时,服务会持续向故障
账户发送请求,导致用户体验下降。需要自动检测上游不稳定状态并临时排除故障账户。
## 改动内容
### 新增 unstableUpstreamHelper.js
- 检测多种上游不稳定错误模式
- 支持环境变量扩展检测规则
### 修改 unifiedClaudeScheduler.js
- 新增 markAccountTemporarilyUnavailable() 方法:标记账户临时不可用
- 新增 isAccountTemporarilyUnavailable() 方法:检查账户是否临时不可用
- 专属账户检查:claude-official、claude-console、bedrock 临时不可用时自动回退到池
- 池账户选择:跳过临时不可用的账户
### 修改 claudeRelayService.js
- _handleServerError() 方法增加临时不可用标记逻辑
- 5xx 错误时自动标记账户临时不可用(5分钟 TTL)
## 检测的状态码
| 分类 | 状态码 | 说明 |
|------|--------|------|
| 服务器错误 | 500-599 | 内部错误、服务不可用等 |
| 超时类 | 408 | 请求超时 |
| 连接类 | 499 | 客户端关闭请求 (Nginx) |
| 网关类 | 502, 503, 504 | 网关错误、服务不可用、网关超时 |
| CDN类 | 522 | Cloudflare 连接超时 |
| 语义类 | error.type = "server_error" | API 级别服务器错误 |
## 环境变量配置
- UNSTABLE_ERROR_TYPES: 额外的错误类型(逗号分隔)
- UNSTABLE_ERROR_KEYWORDS: 错误消息关键词(逗号分隔)
## Redis 键
- temp_unavailable:{accountType}:{accountId} - TTL 300秒
This commit is contained in:
77
src/utils/unstableUpstreamHelper.js
Normal file
77
src/utils/unstableUpstreamHelper.js
Normal file
@@ -0,0 +1,77 @@
|
||||
const logger = require('./logger')
|
||||
|
||||
function parseList(envValue) {
|
||||
if (!envValue) return []
|
||||
return envValue
|
||||
.split(',')
|
||||
.map((s) => s.trim().toLowerCase())
|
||||
.filter(Boolean)
|
||||
}
|
||||
|
||||
const unstableTypes = new Set(parseList(process.env.UNSTABLE_ERROR_TYPES))
|
||||
const unstableKeywords = parseList(process.env.UNSTABLE_ERROR_KEYWORDS)
|
||||
const unstableStatusCodes = new Set([408, 499, 502, 503, 504, 522])
|
||||
|
||||
function normalizeErrorPayload(payload) {
|
||||
if (!payload) return {}
|
||||
|
||||
if (typeof payload === 'string') {
|
||||
try {
|
||||
return normalizeErrorPayload(JSON.parse(payload))
|
||||
} catch (e) {
|
||||
return { message: payload }
|
||||
}
|
||||
}
|
||||
|
||||
if (payload.error && typeof payload.error === 'object') {
|
||||
return {
|
||||
type: payload.error.type || payload.error.error || payload.error.code,
|
||||
code: payload.error.code || payload.error.error || payload.error.type,
|
||||
message: payload.error.message || payload.error.msg || payload.message || payload.error.error
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: payload.type || payload.code,
|
||||
code: payload.code || payload.type,
|
||||
message: payload.message || ''
|
||||
}
|
||||
}
|
||||
|
||||
function isUnstableUpstreamError(statusCode, payload) {
|
||||
const normalizedStatus = Number(statusCode)
|
||||
if (Number.isFinite(normalizedStatus) && normalizedStatus >= 500) {
|
||||
return true
|
||||
}
|
||||
if (Number.isFinite(normalizedStatus) && unstableStatusCodes.has(normalizedStatus)) {
|
||||
return true
|
||||
}
|
||||
|
||||
const { type, code, message } = normalizeErrorPayload(payload)
|
||||
const lowerType = (type || '').toString().toLowerCase()
|
||||
const lowerCode = (code || '').toString().toLowerCase()
|
||||
const lowerMessage = (message || '').toString().toLowerCase()
|
||||
|
||||
if (lowerType === 'server_error' || lowerCode === 'server_error') {
|
||||
return true
|
||||
}
|
||||
if (unstableTypes.has(lowerType) || unstableTypes.has(lowerCode)) {
|
||||
return true
|
||||
}
|
||||
if (unstableKeywords.length > 0) {
|
||||
return unstableKeywords.some((kw) => lowerMessage.includes(kw))
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
function logUnstable(accountLabel, statusCode) {
|
||||
logger.warn(
|
||||
`Detected unstable upstream error (${statusCode}) for account ${accountLabel}, marking temporarily unavailable`
|
||||
)
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
isUnstableUpstreamError,
|
||||
logUnstable
|
||||
}
|
||||
Reference in New Issue
Block a user