diff --git a/.env.example b/.env.example
index c67b80a0..ef8a870b 100644
--- a/.env.example
+++ b/.env.example
@@ -156,7 +156,8 @@ ENABLE_CORS=true
TRUST_PROXY=true
# ⏱️ 上游错误自动暂停配置(秒)
-# UPSTREAM_ERROR_5XX_TTL_SECONDS=300 # 5xx错误暂停时间(默认5分钟)
+# UPSTREAM_ERROR_503_TTL_SECONDS=60 # 503错误暂停时间(默认60秒)
+# UPSTREAM_ERROR_5XX_TTL_SECONDS=300 # 500/502等5xx错误暂停时间(默认5分钟)
# UPSTREAM_ERROR_OVERLOAD_TTL_SECONDS=600 # 529过载暂停时间(默认10分钟)
# UPSTREAM_ERROR_AUTH_TTL_SECONDS=1800 # 401/403认证错误暂停时间(默认30分钟)
# UPSTREAM_ERROR_TIMEOUT_TTL_SECONDS=300 # 504超时暂停时间(默认5分钟)
diff --git a/config/config.example.js b/config/config.example.js
index dda050bc..57cdce1f 100644
--- a/config/config.example.js
+++ b/config/config.example.js
@@ -232,6 +232,8 @@ const config = {
// ⏱️ 上游错误自动暂停配置
upstreamError: {
+ serviceUnavailableTtlSeconds:
+ parseInt(process.env.UPSTREAM_ERROR_503_TTL_SECONDS) || 60, // 503错误暂停秒数
serverErrorTtlSeconds: parseInt(process.env.UPSTREAM_ERROR_5XX_TTL_SECONDS) || 300, // 5xx错误暂停秒数
overloadTtlSeconds: parseInt(process.env.UPSTREAM_ERROR_OVERLOAD_TTL_SECONDS) || 600, // 529过载暂停秒数
authErrorTtlSeconds: parseInt(process.env.UPSTREAM_ERROR_AUTH_TTL_SECONDS) || 1800, // 401/403认证错误暂停秒数
diff --git a/src/services/relay/claudeRelayService.js b/src/services/relay/claudeRelayService.js
index 7f336344..1dc6bc64 100644
--- a/src/services/relay/claudeRelayService.js
+++ b/src/services/relay/claudeRelayService.js
@@ -2886,13 +2886,14 @@ class ClaudeRelayService {
`⏱️ ${prefix}${isTimeout ? 'Timeout' : 'Server'} error for account ${accountId}, error count: ${errorCount}/${threshold}`
)
- // 标记账户为临时不可用(5分钟)
+ // 标记账户为临时不可用(TTL 由 upstreamError 配置决定)
try {
await unifiedClaudeScheduler.markAccountTemporarilyUnavailable(
accountId,
accountType,
sessionHash,
- 300
+ null,
+ statusCode
)
} catch (markError) {
logger.error(`❌ Failed to mark account temporarily unavailable: ${accountId}`, markError)
diff --git a/src/services/scheduler/unifiedClaudeScheduler.js b/src/services/scheduler/unifiedClaudeScheduler.js
index 93d0820b..a92998c2 100644
--- a/src/services/scheduler/unifiedClaudeScheduler.js
+++ b/src/services/scheduler/unifiedClaudeScheduler.js
@@ -1299,10 +1299,11 @@ class UnifiedClaudeScheduler {
accountId,
accountType,
sessionHash = null,
- ttlSeconds = 300
+ ttlSeconds = null,
+ statusCode = 500
) {
try {
- await upstreamErrorHelper.markTempUnavailable(accountId, accountType, 500, ttlSeconds)
+ await upstreamErrorHelper.markTempUnavailable(accountId, accountType, statusCode, ttlSeconds)
if (sessionHash) {
await this._deleteSessionMapping(sessionHash)
}
diff --git a/src/utils/upstreamErrorHelper.js b/src/utils/upstreamErrorHelper.js
index 57a2f271..838ab1fa 100644
--- a/src/utils/upstreamErrorHelper.js
+++ b/src/utils/upstreamErrorHelper.js
@@ -8,6 +8,7 @@ const ERROR_HISTORY_TTL = 3 * 24 * 60 * 60 // 3天
// 默认 TTL(秒)
const DEFAULT_TTL = {
server_error: 300, // 5xx: 5分钟
+ service_unavailable: 60, // 503: 1分钟(默认更短,避免短暂抖动导致长时间不可路由)
overload: 600, // 529: 10分钟
auth_error: 1800, // 401/403: 30分钟
timeout: 300, // 504/网络超时: 5分钟
@@ -29,7 +30,16 @@ const getConfig = () => {
const getTtlConfig = () => {
const config = getConfig()
+ const parseEnvPositiveInt = (name) => {
+ const value = parseInt(process.env[name], 10)
+ return Number.isFinite(value) && value > 0 ? value : null
+ }
+
return {
+ service_unavailable:
+ config.upstreamError?.serviceUnavailableTtlSeconds ??
+ parseEnvPositiveInt('UPSTREAM_ERROR_503_TTL_SECONDS') ??
+ DEFAULT_TTL.service_unavailable,
server_error: config.upstreamError?.serverErrorTtlSeconds ?? DEFAULT_TTL.server_error,
overload: config.upstreamError?.overloadTtlSeconds ?? DEFAULT_TTL.overload,
auth_error: config.upstreamError?.authErrorTtlSeconds ?? DEFAULT_TTL.auth_error,
@@ -52,6 +62,9 @@ const classifyError = (statusCode) => {
if (statusCode === 529) {
return 'overload'
}
+ if (statusCode === 503) {
+ return 'service_unavailable'
+ }
if (statusCode === 504) {
return 'timeout'
}
@@ -204,7 +217,13 @@ const markTempUnavailable = async (
}
const ttlConfig = getTtlConfig()
- const ttlSeconds = customTtl ?? ttlConfig[errorType]
+ const parsedCustomTtl = Number(customTtl)
+ const ttlSeconds =
+ Number.isFinite(parsedCustomTtl) && parsedCustomTtl > 0
+ ? Math.ceil(parsedCustomTtl)
+ : ttlConfig[errorType]
+ const markedAtIso = new Date().toISOString()
+ const expiresAtIso = new Date(Date.now() + ttlSeconds * 1000).toISOString()
const redis = getRedis()
const client = redis.getClientSafe()
@@ -215,18 +234,21 @@ const markTempUnavailable = async (
JSON.stringify({
statusCode,
errorType,
- markedAt: new Date().toISOString()
+ markedAt: markedAtIso,
+ ttlSeconds,
+ cooldownSeconds: ttlSeconds,
+ expiresAt: expiresAtIso
})
)
logger.warn(
- `⏱️ [UpstreamError] Account ${accountId} (${accountType}) marked temporarily unavailable for ${ttlSeconds}s (${statusCode} ${errorType})`
+ `⏱️ [UpstreamError] Account ${accountId} (${accountType}) marked temporarily unavailable for ${ttlSeconds}s (${statusCode} ${errorType}), recovers at ${expiresAtIso}`
)
// 异步记录错误历史,不阻塞主流程
recordErrorHistory(accountId, accountType, statusCode, errorType, context).catch(() => {})
- return { success: true, ttlSeconds, errorType }
+ return { success: true, ttlSeconds, errorType, expiresAt: expiresAtIso }
} catch (error) {
logger.error(
`❌ [UpstreamError] Failed to mark account ${accountId} temporarily unavailable:`,
@@ -242,7 +264,22 @@ const isTempUnavailable = async (accountId, accountType) => {
const redis = getRedis()
const client = redis.getClientSafe()
const key = `${TEMP_UNAVAILABLE_PREFIX}:${accountType}:${accountId}`
- return (await client.exists(key)) === 1
+ const ttl = await client.ttl(key)
+
+ if (ttl === -2) {
+ return false
+ }
+
+ if (ttl === -1) {
+ // 理论上该 key 必须带 TTL;如果无 TTL,自动清理以避免“永久不可用”
+ logger.warn(
+ `⚠️ [UpstreamError] Found temp_unavailable key without TTL for account ${accountId} (${accountType}), auto-clearing`
+ )
+ await client.del(key)
+ return false
+ }
+
+ return ttl > 0
} catch (error) {
logger.error(
`❌ [UpstreamError] Failed to check temp unavailable status for ${accountId}:`,
@@ -281,6 +318,7 @@ const getAllTempUnavailable = async () => {
pipeline.ttl(key)
}
const results = await pipeline.exec()
+ const cleanupPipeline = client.pipeline()
const statuses = {}
for (let i = 0; i < keys.length; i++) {
@@ -295,21 +333,40 @@ const getAllTempUnavailable = async () => {
continue
}
+ if (ttl === -1) {
+ // 自愈:清理无 TTL 的异常键,避免账户被永久阻塞
+ cleanupPipeline.del(key)
+ continue
+ }
+
try {
const data = JSON.parse(value)
const compositeKey = `${accountType}:${accountId}`
+ const cooldownSecondsRaw = Number(data.cooldownSeconds)
+ const ttlSecondsRaw = Number(data.ttlSeconds)
+ const configuredCooldownSeconds = Number.isFinite(cooldownSecondsRaw)
+ ? Math.max(0, Math.floor(cooldownSecondsRaw))
+ : Number.isFinite(ttlSecondsRaw)
+ ? Math.max(0, Math.floor(ttlSecondsRaw))
+ : null
+
statuses[compositeKey] = {
accountId,
accountType,
statusCode: data.statusCode,
errorType: data.errorType,
markedAt: data.markedAt,
- ttl: ttl > 0 ? ttl : 0
+ ttl: ttl > 0 ? ttl : 0,
+ remainingSeconds: ttl > 0 ? ttl : 0,
+ cooldownSeconds: configuredCooldownSeconds,
+ expiresAt: data.expiresAt || null
}
} catch {
// ignore parse errors
}
}
+
+ await cleanupPipeline.exec().catch(() => {})
return statuses
} catch (error) {
logger.error('❌ [UpstreamError] Failed to get all temp unavailable statuses:', error)
diff --git a/web/admin-spa/src/views/AccountsView.vue b/web/admin-spa/src/views/AccountsView.vue
index dc61685b..8c8c1867 100644
--- a/web/admin-spa/src/views/AccountsView.vue
+++ b/web/admin-spa/src/views/AccountsView.vue
@@ -756,11 +756,23 @@
>
临时暂停
- ({{ formatTempUnavailableTime(account.tempUnavailable.ttl) }})
+
+ ({{
+ formatTempUnavailableTime(
+ getTempUnavailableRemainingSeconds(account.tempUnavailable)
+ )
+ }}
+ /
+ {{
+ formatTempUnavailableTime(
+ getTempUnavailableCooldownSeconds(account.tempUnavailable)
+ )
+ }})
+
@@ -2328,6 +2340,38 @@ const platformToAccountType = (platform) => {
if (platform === 'azure_openai') return 'azure-openai'
return platform
}
+
+const TEMP_UNAVAILABLE_ACCOUNT_TYPE_ALIASES = {
+ claude: ['claude-official', 'claude'],
+ 'claude-console': ['claude-console'],
+ bedrock: ['bedrock'],
+ gemini: ['gemini'],
+ 'gemini-api': ['gemini-api'],
+ openai: ['openai'],
+ 'openai-responses': ['openai-responses'],
+ ccr: ['ccr'],
+ droid: ['droid'],
+ azure_openai: ['azure-openai'],
+ 'azure-openai': ['azure-openai']
+}
+
+const resolveTempUnavailableStatusForAccount = (tempStatuses, account) => {
+ if (!tempStatuses || !account) return null
+
+ const accountTypeAliases = TEMP_UNAVAILABLE_ACCOUNT_TYPE_ALIASES[account.platform] || [
+ account.platform
+ ]
+
+ for (const accountType of accountTypeAliases) {
+ const key = `${accountType}:${account.id}`
+ if (tempStatuses[key]) {
+ return tempStatuses[key]
+ }
+ }
+
+ return null
+}
+
const openErrorHistory = (account) => {
errorHistoryTarget.value = {
accountType: platformToAccountType(account.platform),
@@ -3437,23 +3481,7 @@ const loadAccounts = async (forceReload = false) => {
if (tempRes?.success && tempRes.data) {
const tempStatuses = tempRes.data
filteredAccounts = filteredAccounts.map((account) => {
- // 尝试匹配 accountType:accountId
- const platformTypeMap = {
- claude: 'claude-official',
- 'claude-console': 'claude-console',
- bedrock: 'bedrock',
- gemini: 'gemini',
- 'gemini-api': 'gemini-api',
- openai: 'openai',
- 'openai-responses': 'openai-responses',
- ccr: 'ccr',
- droid: 'droid',
- azure_openai: 'azure-openai',
- 'azure-openai': 'azure-openai'
- }
- const accountType = platformTypeMap[account.platform] || account.platform
- const key = `${accountType}:${account.id}`
- const tempStatus = tempStatuses[key]
+ const tempStatus = resolveTempUnavailableStatusForAccount(tempStatuses, account)
if (tempStatus) {
return { ...account, tempUnavailable: tempStatus }
}
@@ -3757,6 +3785,83 @@ const formatTempUnavailableTime = (seconds) => {
return `${secs}s`
}
+const toPositiveInteger = (value) => {
+ const parsed = Number(value)
+ return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : 0
+}
+
+const getTempUnavailableRemainingSeconds = (tempUnavailable) => {
+ if (!tempUnavailable) return 0
+ return toPositiveInteger(tempUnavailable.remainingSeconds || tempUnavailable.ttl)
+}
+
+const getTempUnavailableCooldownSeconds = (tempUnavailable) => {
+ if (!tempUnavailable) return 0
+ return toPositiveInteger(tempUnavailable.cooldownSeconds)
+}
+
+const getTempUnavailableRecoveryAt = (tempUnavailable) => {
+ if (!tempUnavailable) return ''
+
+ if (tempUnavailable.expiresAt) {
+ const expiresAt = new Date(tempUnavailable.expiresAt)
+ if (!Number.isNaN(expiresAt.getTime())) {
+ return tempUnavailable.expiresAt
+ }
+ }
+
+ if (tempUnavailable.markedAt) {
+ const markedAt = new Date(tempUnavailable.markedAt)
+ const cooldownSeconds = getTempUnavailableCooldownSeconds(tempUnavailable)
+ if (!Number.isNaN(markedAt.getTime()) && cooldownSeconds > 0) {
+ return new Date(markedAt.getTime() + cooldownSeconds * 1000).toISOString()
+ }
+ }
+
+ return ''
+}
+
+const formatTempUnavailableRecoveryAt = (tempUnavailable) => {
+ const recoveryAt = getTempUnavailableRecoveryAt(tempUnavailable)
+ if (!recoveryAt) return ''
+
+ const recoveryDate = new Date(recoveryAt)
+ if (Number.isNaN(recoveryDate.getTime())) return ''
+
+ const month = `${recoveryDate.getMonth() + 1}`.padStart(2, '0')
+ const day = `${recoveryDate.getDate()}`.padStart(2, '0')
+ const hours = `${recoveryDate.getHours()}`.padStart(2, '0')
+ const minutes = `${recoveryDate.getMinutes()}`.padStart(2, '0')
+ const seconds = `${recoveryDate.getSeconds()}`.padStart(2, '0')
+ return `${month}-${day} ${hours}:${minutes}:${seconds}`
+}
+
+const getTempUnavailableTooltipContent = (tempUnavailable) => {
+ if (!tempUnavailable) return ''
+
+ const details = []
+ const statusCodeText = tempUnavailable.statusCode ? `HTTP ${tempUnavailable.statusCode}` : ''
+ const errorTypeText = tempUnavailable.errorType || 'upstream_error'
+ details.push(`${errorTypeText}${statusCodeText ? ` (${statusCodeText})` : ''}`)
+
+ const cooldownSeconds = getTempUnavailableCooldownSeconds(tempUnavailable)
+ if (cooldownSeconds > 0) {
+ details.push(`内部冷却 ${formatTempUnavailableTime(cooldownSeconds)}`)
+ }
+
+ const remainingSeconds = getTempUnavailableRemainingSeconds(tempUnavailable)
+ if (remainingSeconds > 0) {
+ details.push(`剩余 ${formatTempUnavailableTime(remainingSeconds)}`)
+ }
+
+ const recoveryAtText = formatTempUnavailableRecoveryAt(tempUnavailable)
+ if (recoveryAtText) {
+ details.push(`预计恢复 ${recoveryAtText}`)
+ }
+
+ return details.join(',')
+}
+
// 检查账户是否被限流
const isAccountRateLimited = (account) => {
if (!account) return false
@@ -4488,12 +4593,25 @@ const getRoutingBlockReasons = (account) => {
}
if (account.tempUnavailable) {
- const ttl = Number.isFinite(account.tempUnavailable.ttl)
- ? formatTempUnavailableTime(account.tempUnavailable.ttl)
- : ''
+ const cooldownSeconds = getTempUnavailableCooldownSeconds(account.tempUnavailable)
+ const remainingSeconds = getTempUnavailableRemainingSeconds(account.tempUnavailable)
+ const recoveryAtText = formatTempUnavailableRecoveryAt(account.tempUnavailable)
+
+ const detailParts = []
+ if (cooldownSeconds > 0) {
+ detailParts.push(`内部冷却 ${formatTempUnavailableTime(cooldownSeconds)}`)
+ }
+ if (remainingSeconds > 0) {
+ detailParts.push(`剩余 ${formatTempUnavailableTime(remainingSeconds)}`)
+ }
+ if (recoveryAtText) {
+ detailParts.push(`预计恢复 ${recoveryAtText}`)
+ }
+
+ const detailText = detailParts.length > 0 ? `,${detailParts.join(',')}` : ''
const tempReason = account.tempUnavailable.errorType
- ? `临时暂停(${account.tempUnavailable.errorType}${account.tempUnavailable.statusCode ? ` / HTTP ${account.tempUnavailable.statusCode}` : ''}${ttl ? `,剩余 ${ttl}` : ''})`
- : `临时暂停${ttl ? `(剩余 ${ttl})` : ''}`
+ ? `临时暂停(${account.tempUnavailable.errorType}${account.tempUnavailable.statusCode ? ` / HTTP ${account.tempUnavailable.statusCode}` : ''}${detailText})`
+ : `临时暂停${detailParts.length > 0 ? `(${detailParts.join(',')})` : ''}`
reasons.push(tempReason)
}