mirror of
https://github.com/Wei-Shaw/claude-relay-service.git
synced 2026-01-22 16:43:35 +00:00
Merge pull request #758 from IanShaw027/upstream-pr-temp-unavailable [skip ci]
feat: 添加上游不稳定错误检测与账户临时不可用机制
This commit is contained in:
@@ -1948,7 +1948,13 @@ class ClaudeRelayService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 🛠️ 统一的错误处理方法
|
// 🛠️ 统一的错误处理方法
|
||||||
async _handleServerError(accountId, statusCode, _sessionHash = null, context = '') {
|
async _handleServerError(
|
||||||
|
accountId,
|
||||||
|
statusCode,
|
||||||
|
sessionHash = null,
|
||||||
|
context = '',
|
||||||
|
accountType = 'claude-official'
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
await claudeAccountService.recordServerError(accountId, statusCode)
|
await claudeAccountService.recordServerError(accountId, statusCode)
|
||||||
const errorCount = await claudeAccountService.getServerErrorCount(accountId)
|
const errorCount = await claudeAccountService.getServerErrorCount(accountId)
|
||||||
@@ -1962,6 +1968,18 @@ class ClaudeRelayService {
|
|||||||
`⏱️ ${prefix}${isTimeout ? 'Timeout' : 'Server'} error for account ${accountId}, error count: ${errorCount}/${threshold}`
|
`⏱️ ${prefix}${isTimeout ? 'Timeout' : 'Server'} error for account ${accountId}, error count: ${errorCount}/${threshold}`
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// 标记账户为临时不可用(5分钟)
|
||||||
|
try {
|
||||||
|
await unifiedClaudeScheduler.markAccountTemporarilyUnavailable(
|
||||||
|
accountId,
|
||||||
|
accountType,
|
||||||
|
sessionHash,
|
||||||
|
300
|
||||||
|
)
|
||||||
|
} catch (markError) {
|
||||||
|
logger.error(`❌ Failed to mark account temporarily unavailable: ${accountId}`, markError)
|
||||||
|
}
|
||||||
|
|
||||||
if (errorCount > threshold) {
|
if (errorCount > threshold) {
|
||||||
const errorTypeLabel = isTimeout ? 'timeout' : '5xx'
|
const errorTypeLabel = isTimeout ? 'timeout' : '5xx'
|
||||||
// ⚠️ 只记录5xx/504告警,不再自动停止调度,避免上游抖动导致误停
|
// ⚠️ 只记录5xx/504告警,不再自动停止调度,避免上游抖动导致误停
|
||||||
|
|||||||
@@ -177,30 +177,41 @@ class UnifiedClaudeScheduler {
|
|||||||
// 普通专属账户
|
// 普通专属账户
|
||||||
const boundAccount = await redis.getClaudeAccount(apiKeyData.claudeAccountId)
|
const boundAccount = await redis.getClaudeAccount(apiKeyData.claudeAccountId)
|
||||||
if (boundAccount && boundAccount.isActive === 'true' && boundAccount.status !== 'error') {
|
if (boundAccount && boundAccount.isActive === 'true' && boundAccount.status !== 'error') {
|
||||||
const isRateLimited = await claudeAccountService.isAccountRateLimited(boundAccount.id)
|
// 检查是否临时不可用
|
||||||
if (isRateLimited) {
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
const rateInfo = await claudeAccountService.getAccountRateLimitInfo(boundAccount.id)
|
boundAccount.id,
|
||||||
const error = new Error('Dedicated Claude account is rate limited')
|
'claude-official'
|
||||||
error.code = 'CLAUDE_DEDICATED_RATE_LIMITED'
|
)
|
||||||
error.accountId = boundAccount.id
|
if (isTempUnavailable) {
|
||||||
error.rateLimitEndAt = rateInfo?.rateLimitEndAt || boundAccount.rateLimitEndAt || null
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!this._isSchedulable(boundAccount.schedulable)) {
|
|
||||||
logger.warn(
|
logger.warn(
|
||||||
`⚠️ Bound Claude OAuth account ${apiKeyData.claudeAccountId} is not schedulable (schedulable: ${boundAccount?.schedulable}), falling back to pool`
|
`⏱️ Bound Claude OAuth account ${boundAccount.id} is temporarily unavailable, falling back to pool`
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
if (isOpusRequest) {
|
const isRateLimited = await claudeAccountService.isAccountRateLimited(boundAccount.id)
|
||||||
await claudeAccountService.clearExpiredOpusRateLimit(boundAccount.id)
|
if (isRateLimited) {
|
||||||
|
const rateInfo = await claudeAccountService.getAccountRateLimitInfo(boundAccount.id)
|
||||||
|
const error = new Error('Dedicated Claude account is rate limited')
|
||||||
|
error.code = 'CLAUDE_DEDICATED_RATE_LIMITED'
|
||||||
|
error.accountId = boundAccount.id
|
||||||
|
error.rateLimitEndAt = rateInfo?.rateLimitEndAt || boundAccount.rateLimitEndAt || null
|
||||||
|
throw error
|
||||||
}
|
}
|
||||||
logger.info(
|
|
||||||
`🎯 Using bound dedicated Claude OAuth account: ${boundAccount.name} (${apiKeyData.claudeAccountId}) for API key ${apiKeyData.name}`
|
if (!this._isSchedulable(boundAccount.schedulable)) {
|
||||||
)
|
logger.warn(
|
||||||
return {
|
`⚠️ Bound Claude OAuth account ${apiKeyData.claudeAccountId} is not schedulable (schedulable: ${boundAccount?.schedulable}), falling back to pool`
|
||||||
accountId: apiKeyData.claudeAccountId,
|
)
|
||||||
accountType: 'claude-official'
|
} else {
|
||||||
|
if (isOpusRequest) {
|
||||||
|
await claudeAccountService.clearExpiredOpusRateLimit(boundAccount.id)
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
`🎯 Using bound dedicated Claude OAuth account: ${boundAccount.name} (${apiKeyData.claudeAccountId}) for API key ${apiKeyData.name}`
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
accountId: apiKeyData.claudeAccountId,
|
||||||
|
accountType: 'claude-official'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -221,12 +232,23 @@ class UnifiedClaudeScheduler {
|
|||||||
boundConsoleAccount.status === 'active' &&
|
boundConsoleAccount.status === 'active' &&
|
||||||
this._isSchedulable(boundConsoleAccount.schedulable)
|
this._isSchedulable(boundConsoleAccount.schedulable)
|
||||||
) {
|
) {
|
||||||
logger.info(
|
// 检查是否临时不可用
|
||||||
`🎯 Using bound dedicated Claude Console account: ${boundConsoleAccount.name} (${apiKeyData.claudeConsoleAccountId}) for API key ${apiKeyData.name}`
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
|
boundConsoleAccount.id,
|
||||||
|
'claude-console'
|
||||||
)
|
)
|
||||||
return {
|
if (isTempUnavailable) {
|
||||||
accountId: apiKeyData.claudeConsoleAccountId,
|
logger.warn(
|
||||||
accountType: 'claude-console'
|
`⏱️ Bound Claude Console account ${boundConsoleAccount.id} is temporarily unavailable, falling back to pool`
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
logger.info(
|
||||||
|
`🎯 Using bound dedicated Claude Console account: ${boundConsoleAccount.name} (${apiKeyData.claudeConsoleAccountId}) for API key ${apiKeyData.name}`
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
accountId: apiKeyData.claudeConsoleAccountId,
|
||||||
|
accountType: 'claude-console'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@@ -245,12 +267,23 @@ class UnifiedClaudeScheduler {
|
|||||||
boundBedrockAccountResult.data.isActive === true &&
|
boundBedrockAccountResult.data.isActive === true &&
|
||||||
this._isSchedulable(boundBedrockAccountResult.data.schedulable)
|
this._isSchedulable(boundBedrockAccountResult.data.schedulable)
|
||||||
) {
|
) {
|
||||||
logger.info(
|
// 检查是否临时不可用
|
||||||
`🎯 Using bound dedicated Bedrock account: ${boundBedrockAccountResult.data.name} (${apiKeyData.bedrockAccountId}) for API key ${apiKeyData.name}`
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
|
apiKeyData.bedrockAccountId,
|
||||||
|
'bedrock'
|
||||||
)
|
)
|
||||||
return {
|
if (isTempUnavailable) {
|
||||||
accountId: apiKeyData.bedrockAccountId,
|
logger.warn(
|
||||||
accountType: 'bedrock'
|
`⏱️ Bound Bedrock account ${apiKeyData.bedrockAccountId} is temporarily unavailable, falling back to pool`
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
logger.info(
|
||||||
|
`🎯 Using bound dedicated Bedrock account: ${boundBedrockAccountResult.data.name} (${apiKeyData.bedrockAccountId}) for API key ${apiKeyData.name}`
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
accountId: apiKeyData.bedrockAccountId,
|
||||||
|
accountType: 'bedrock'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@@ -496,6 +529,18 @@ class UnifiedClaudeScheduler {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 检查是否临时不可用
|
||||||
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
|
account.id,
|
||||||
|
'claude-official'
|
||||||
|
)
|
||||||
|
if (isTempUnavailable) {
|
||||||
|
logger.debug(
|
||||||
|
`⏭️ Skipping Claude Official account ${account.name} - temporarily unavailable`
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// 检查是否被限流
|
// 检查是否被限流
|
||||||
const isRateLimited = await claudeAccountService.isAccountRateLimited(account.id)
|
const isRateLimited = await claudeAccountService.isAccountRateLimited(account.id)
|
||||||
if (isRateLimited) {
|
if (isRateLimited) {
|
||||||
@@ -584,6 +629,18 @@ class UnifiedClaudeScheduler {
|
|||||||
// 继续处理该账号
|
// 继续处理该账号
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 检查是否临时不可用
|
||||||
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
|
currentAccount.id,
|
||||||
|
'claude-console'
|
||||||
|
)
|
||||||
|
if (isTempUnavailable) {
|
||||||
|
logger.debug(
|
||||||
|
`⏭️ Skipping Claude Console account ${currentAccount.name} - temporarily unavailable`
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// 检查是否被限流
|
// 检查是否被限流
|
||||||
const isRateLimited = await claudeConsoleAccountService.isAccountRateLimited(
|
const isRateLimited = await claudeConsoleAccountService.isAccountRateLimited(
|
||||||
currentAccount.id
|
currentAccount.id
|
||||||
@@ -682,7 +739,15 @@ class UnifiedClaudeScheduler {
|
|||||||
account.accountType === 'shared' &&
|
account.accountType === 'shared' &&
|
||||||
this._isSchedulable(account.schedulable)
|
this._isSchedulable(account.schedulable)
|
||||||
) {
|
) {
|
||||||
// 检查是否可调度
|
// 检查是否临时不可用
|
||||||
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(
|
||||||
|
account.id,
|
||||||
|
'bedrock'
|
||||||
|
)
|
||||||
|
if (isTempUnavailable) {
|
||||||
|
logger.debug(`⏭️ Skipping Bedrock account ${account.name} - temporarily unavailable`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
availableAccounts.push({
|
availableAccounts.push({
|
||||||
...account,
|
...account,
|
||||||
@@ -731,6 +796,13 @@ class UnifiedClaudeScheduler {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 检查是否临时不可用
|
||||||
|
const isTempUnavailable = await this.isAccountTemporarilyUnavailable(account.id, 'ccr')
|
||||||
|
if (isTempUnavailable) {
|
||||||
|
logger.debug(`⏭️ Skipping CCR account ${account.name} - temporarily unavailable`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// 检查是否被限流
|
// 检查是否被限流
|
||||||
const isRateLimited = await ccrAccountService.isAccountRateLimited(account.id)
|
const isRateLimited = await ccrAccountService.isAccountRateLimited(account.id)
|
||||||
const isQuotaExceeded = await ccrAccountService.isAccountQuotaExceeded(account.id)
|
const isQuotaExceeded = await ccrAccountService.isAccountQuotaExceeded(account.id)
|
||||||
@@ -1099,6 +1171,42 @@ class UnifiedClaudeScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ⏱️ 标记账户为临时不可用状态(用于5xx等临时故障,默认5分钟后自动恢复)
|
||||||
|
async markAccountTemporarilyUnavailable(
|
||||||
|
accountId,
|
||||||
|
accountType,
|
||||||
|
sessionHash = null,
|
||||||
|
ttlSeconds = 300
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const client = redis.getClientSafe()
|
||||||
|
const key = `temp_unavailable:${accountType}:${accountId}`
|
||||||
|
await client.setex(key, ttlSeconds, '1')
|
||||||
|
if (sessionHash) {
|
||||||
|
await this._deleteSessionMapping(sessionHash)
|
||||||
|
}
|
||||||
|
logger.warn(
|
||||||
|
`⏱️ Account ${accountId} (${accountType}) marked temporarily unavailable for ${ttlSeconds}s`
|
||||||
|
)
|
||||||
|
return { success: true }
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`❌ Failed to mark account temporarily unavailable: ${accountId}`, error)
|
||||||
|
return { success: false }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 🔍 检查账户是否临时不可用
|
||||||
|
async isAccountTemporarilyUnavailable(accountId, accountType) {
|
||||||
|
try {
|
||||||
|
const client = redis.getClientSafe()
|
||||||
|
const key = `temp_unavailable:${accountType}:${accountId}`
|
||||||
|
return (await client.exists(key)) === 1
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`❌ Failed to check temp unavailable status: ${accountId}`, error)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 🚫 标记账户为限流状态
|
// 🚫 标记账户为限流状态
|
||||||
async markAccountRateLimited(
|
async markAccountRateLimited(
|
||||||
accountId,
|
accountId,
|
||||||
|
|||||||
81
src/utils/unstableUpstreamHelper.js
Normal file
81
src/utils/unstableUpstreamHelper.js
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
const logger = require('./logger')
|
||||||
|
|
||||||
|
function parseList(envValue) {
|
||||||
|
if (!envValue) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
return envValue
|
||||||
|
.split(',')
|
||||||
|
.map((s) => s.trim().toLowerCase())
|
||||||
|
.filter(Boolean)
|
||||||
|
}
|
||||||
|
|
||||||
|
const unstableTypes = new Set(parseList(process.env.UNSTABLE_ERROR_TYPES))
|
||||||
|
const unstableKeywords = parseList(process.env.UNSTABLE_ERROR_KEYWORDS)
|
||||||
|
const unstableStatusCodes = new Set([408, 499, 502, 503, 504, 522])
|
||||||
|
|
||||||
|
function normalizeErrorPayload(payload) {
|
||||||
|
if (!payload) {
|
||||||
|
return {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof payload === 'string') {
|
||||||
|
try {
|
||||||
|
return normalizeErrorPayload(JSON.parse(payload))
|
||||||
|
} catch (e) {
|
||||||
|
return { message: payload }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload.error && typeof payload.error === 'object') {
|
||||||
|
return {
|
||||||
|
type: payload.error.type || payload.error.error || payload.error.code,
|
||||||
|
code: payload.error.code || payload.error.error || payload.error.type,
|
||||||
|
message: payload.error.message || payload.error.msg || payload.message || payload.error.error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: payload.type || payload.code,
|
||||||
|
code: payload.code || payload.type,
|
||||||
|
message: payload.message || ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function isUnstableUpstreamError(statusCode, payload) {
|
||||||
|
const normalizedStatus = Number(statusCode)
|
||||||
|
if (Number.isFinite(normalizedStatus) && normalizedStatus >= 500) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if (Number.isFinite(normalizedStatus) && unstableStatusCodes.has(normalizedStatus)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
const { type, code, message } = normalizeErrorPayload(payload)
|
||||||
|
const lowerType = (type || '').toString().toLowerCase()
|
||||||
|
const lowerCode = (code || '').toString().toLowerCase()
|
||||||
|
const lowerMessage = (message || '').toString().toLowerCase()
|
||||||
|
|
||||||
|
if (lowerType === 'server_error' || lowerCode === 'server_error') {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if (unstableTypes.has(lowerType) || unstableTypes.has(lowerCode)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if (unstableKeywords.length > 0) {
|
||||||
|
return unstableKeywords.some((kw) => lowerMessage.includes(kw))
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
function logUnstable(accountLabel, statusCode) {
|
||||||
|
logger.warn(
|
||||||
|
`Detected unstable upstream error (${statusCode}) for account ${accountLabel}, marking temporarily unavailable`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
isUnstableUpstreamError,
|
||||||
|
logUnstable
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user