mirror of
https://github.com/Wei-Shaw/sub2api.git
synced 2026-03-30 12:54:09 +00:00
Merge branch 'release/custom-0.1.79'
# Conflicts: # AGENTS.md # CLAUDE.md # backend/cmd/server/VERSION # backend/internal/handler/gateway_handler.go # backend/internal/handler/gemini_v1beta_handler.go # backend/internal/service/antigravity_gateway_service.go # backend/internal/service/antigravity_rate_limit_test.go # backend/internal/service/antigravity_single_account_retry_test.go # backend/internal/service/antigravity_smart_retry_test.go
This commit is contained in:
143
AGENTS.md
143
AGENTS.md
@@ -97,13 +97,22 @@ git push origin main
|
||||
|
||||
### 前置条件
|
||||
|
||||
- 本地已配置 SSH 别名 `clicodeplus` 连接到服务器
|
||||
- 服务器部署目录:`/root/sub2api`(正式)、`/root/sub2api-beta`(测试)
|
||||
- 服务器使用 Docker Compose 部署
|
||||
- 本地已配置 SSH 别名 `clicodeplus` 连接到生产服务器(运行服务)
|
||||
- 本地已配置 SSH 别名 `us-asaki-root` 连接到构建服务器(拉取代码、构建镜像)
|
||||
- 生产服务器部署目录:`/root/sub2api`(正式)、`/root/sub2api-beta`(测试)
|
||||
- 生产服务器使用 Docker Compose 部署
|
||||
- **镜像统一在构建服务器上构建**,避免生产服务器因编译占用 CPU/内存影响线上服务
|
||||
|
||||
### 服务器角色说明
|
||||
|
||||
| 服务器 | SSH 别名 | 职责 |
|
||||
|--------|----------|------|
|
||||
| 构建服务器 | `us-asaki-root` | 拉取代码、`docker build` 构建镜像 |
|
||||
| 生产服务器 | `clicodeplus` | 加载镜像、运行服务、部署验证 |
|
||||
|
||||
### 部署环境说明
|
||||
|
||||
| 环境 | 目录 | 端口 | 数据库 | 容器名 |
|
||||
| 环境 | 目录(生产服务器) | 端口 | 数据库 | 容器名 |
|
||||
|------|------|------|--------|--------|
|
||||
| 正式 | `/root/sub2api` | 8080 | `sub2api` | `sub2api` |
|
||||
| Beta | `/root/sub2api-beta` | 8084 | `beta` | `sub2api-beta` |
|
||||
@@ -155,26 +164,33 @@ git commit -m "chore: bump version to 0.1.69.2"
|
||||
git push origin release/custom-0.1.69
|
||||
```
|
||||
|
||||
#### 1. 服务器拉取代码
|
||||
#### 1. 构建服务器拉取代码
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "cd /root/sub2api && git fetch fork && git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69"
|
||||
ssh us-asaki-root "cd /root/sub2api && git fetch fork && git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69"
|
||||
```
|
||||
|
||||
#### 2. 服务器构建镜像
|
||||
#### 2. 构建服务器构建镜像
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "cd /root/sub2api && docker build --no-cache -t sub2api:latest -f Dockerfile ."
|
||||
ssh us-asaki-root "cd /root/sub2api && docker build --no-cache -t sub2api:latest -f Dockerfile ."
|
||||
```
|
||||
|
||||
#### 3. 更新镜像标签并重启服务
|
||||
#### 3. 传输镜像到生产服务器并加载
|
||||
|
||||
```bash
|
||||
# 导出镜像 → 通过管道传输 → 生产服务器加载
|
||||
ssh us-asaki-root "docker save sub2api:latest" | ssh clicodeplus "docker load"
|
||||
```
|
||||
|
||||
#### 4. 更新镜像标签并重启服务
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "docker tag sub2api:latest weishaw/sub2api:latest"
|
||||
ssh clicodeplus "cd /root/sub2api/deploy && docker compose up -d --force-recreate sub2api"
|
||||
```
|
||||
|
||||
#### 4. 验证部署
|
||||
#### 5. 验证部署
|
||||
|
||||
```bash
|
||||
# 查看启动日志
|
||||
@@ -213,8 +229,8 @@ ssh clicodeplus "docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Ports}}' |
|
||||
### 首次部署步骤
|
||||
|
||||
```bash
|
||||
# 0) 进入服务器
|
||||
ssh clicodeplus
|
||||
# 0) 进入构建服务器
|
||||
ssh us-asaki-root
|
||||
|
||||
# 1) 克隆代码到新目录(示例使用你的 fork)
|
||||
cd /root
|
||||
@@ -222,7 +238,23 @@ git clone https://github.com/touwaeriol/sub2api.git sub2api-beta
|
||||
cd /root/sub2api-beta
|
||||
git checkout release/custom-0.1.71
|
||||
|
||||
# 2) 准备 beta 的 .env(敏感信息只写这里)
|
||||
# 2) 构建 beta 镜像
|
||||
docker build -t sub2api:beta -f Dockerfile .
|
||||
exit
|
||||
|
||||
# 3) 传输镜像到生产服务器
|
||||
ssh us-asaki-root "docker save sub2api:beta" | ssh clicodeplus "docker load"
|
||||
|
||||
# 4) 在生产服务器上准备 beta 环境
|
||||
ssh clicodeplus
|
||||
|
||||
# 克隆代码(仅用于 deploy 配置和版本号确认,不在此构建)
|
||||
cd /root
|
||||
git clone https://github.com/touwaeriol/sub2api.git sub2api-beta
|
||||
cd /root/sub2api-beta
|
||||
git checkout release/custom-0.1.71
|
||||
|
||||
# 5) 准备 beta 的 .env(敏感信息只写这里)
|
||||
cd /root/sub2api-beta/deploy
|
||||
|
||||
# 推荐:从现网 .env 复制,保证除 DB 名/用户/端口外完全一致
|
||||
@@ -233,7 +265,7 @@ perl -pi -e 's/^SERVER_PORT=.*/SERVER_PORT=8084/' ./.env
|
||||
perl -pi -e 's/^POSTGRES_USER=.*/POSTGRES_USER=beta/' ./.env
|
||||
perl -pi -e 's/^POSTGRES_DB=.*/POSTGRES_DB=beta/' ./.env
|
||||
|
||||
# 3) 写 compose override(避免与现网容器名冲突,镜像使用本地构建的 sub2api:beta)
|
||||
# 6) 写 compose override(避免与现网容器名冲突,镜像使用构建服务器传输的 sub2api:beta)
|
||||
cat > docker-compose.override.yml <<'YAML'
|
||||
services:
|
||||
sub2api:
|
||||
@@ -243,15 +275,11 @@ services:
|
||||
container_name: sub2api-beta-redis
|
||||
YAML
|
||||
|
||||
# 4) 构建 beta 镜像(基于当前代码)
|
||||
cd /root/sub2api-beta
|
||||
docker build -t sub2api:beta -f Dockerfile .
|
||||
|
||||
# 5) 启动 beta(独立 project,确保不影响现网)
|
||||
# 7) 启动 beta(独立 project,确保不影响现网)
|
||||
cd /root/sub2api-beta/deploy
|
||||
docker compose -p sub2api-beta --env-file .env -f docker-compose.yml -f docker-compose.override.yml up -d
|
||||
|
||||
# 6) 验证 beta
|
||||
# 8) 验证 beta
|
||||
curl -fsS http://127.0.0.1:8084/health
|
||||
docker logs sub2api-beta --tail 50
|
||||
```
|
||||
@@ -265,11 +293,20 @@ docker logs sub2api-beta --tail 50
|
||||
|
||||
注意:需要数据库侧已存在 `beta` 用户与 `beta` 数据库,并授予权限;否则容器会启动失败并不断重启。
|
||||
|
||||
### 更新 beta(拉代码 + 仅重建 beta 容器)
|
||||
### 更新 beta(构建服务器构建 + 传输 + 仅重启 beta 容器)
|
||||
|
||||
```bash
|
||||
# 1) 构建服务器拉取代码并构建镜像
|
||||
ssh us-asaki-root "set -e; cd /root/sub2api-beta && git fetch --all --tags && git checkout -f release/custom-0.1.71 && git reset --hard origin/release/custom-0.1.71"
|
||||
ssh us-asaki-root "cd /root/sub2api-beta && docker build -t sub2api:beta -f Dockerfile ."
|
||||
|
||||
# 2) 传输镜像到生产服务器
|
||||
ssh us-asaki-root "docker save sub2api:beta" | ssh clicodeplus "docker load"
|
||||
|
||||
# 3) 生产服务器同步代码(用于版本号确认和 deploy 配置)
|
||||
ssh clicodeplus "set -e; cd /root/sub2api-beta && git fetch --all --tags && git checkout -f release/custom-0.1.71 && git reset --hard origin/release/custom-0.1.71"
|
||||
ssh clicodeplus "cd /root/sub2api-beta && docker build -t sub2api:beta -f Dockerfile ."
|
||||
|
||||
# 4) 重启 beta 容器
|
||||
ssh clicodeplus "cd /root/sub2api-beta/deploy && docker compose -p sub2api-beta --env-file .env -f docker-compose.yml -f docker-compose.override.yml up -d --no-deps --force-recreate sub2api"
|
||||
ssh clicodeplus "curl -fsS http://127.0.0.1:8084/health"
|
||||
```
|
||||
@@ -284,7 +321,36 @@ ssh clicodeplus "cd /root/sub2api-beta/deploy && docker compose -p sub2api-beta
|
||||
|
||||
## 服务器首次部署
|
||||
|
||||
### 1. 克隆代码并配置远程仓库
|
||||
### 1. 构建服务器:克隆代码并配置远程仓库
|
||||
|
||||
```bash
|
||||
ssh us-asaki-root
|
||||
cd /root
|
||||
git clone https://github.com/Wei-Shaw/sub2api.git
|
||||
cd sub2api
|
||||
|
||||
# 添加 fork 仓库
|
||||
git remote add fork https://github.com/touwaeriol/sub2api.git
|
||||
```
|
||||
|
||||
### 2. 构建服务器:切换到定制分支并构建镜像
|
||||
|
||||
```bash
|
||||
git fetch fork
|
||||
git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69
|
||||
|
||||
cd /root/sub2api
|
||||
docker build -t sub2api:latest -f Dockerfile .
|
||||
exit
|
||||
```
|
||||
|
||||
### 3. 传输镜像到生产服务器
|
||||
|
||||
```bash
|
||||
ssh us-asaki-root "docker save sub2api:latest" | ssh clicodeplus "docker load"
|
||||
```
|
||||
|
||||
### 4. 生产服务器:克隆代码并配置环境
|
||||
|
||||
```bash
|
||||
ssh clicodeplus
|
||||
@@ -294,42 +360,23 @@ cd sub2api
|
||||
|
||||
# 添加 fork 仓库
|
||||
git remote add fork https://github.com/touwaeriol/sub2api.git
|
||||
```
|
||||
|
||||
### 2. 切换到定制分支并配置环境
|
||||
|
||||
```bash
|
||||
git fetch fork
|
||||
git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69
|
||||
|
||||
# 配置环境变量
|
||||
cd deploy
|
||||
cp .env.example .env
|
||||
vim .env # 配置 DATABASE_URL, REDIS_URL, JWT_SECRET 等
|
||||
```
|
||||
|
||||
### 3. 构建并启动
|
||||
### 5. 生产服务器:更新镜像标签并启动服务
|
||||
|
||||
```bash
|
||||
cd /root/sub2api
|
||||
docker build -t sub2api:latest -f Dockerfile .
|
||||
docker tag sub2api:latest weishaw/sub2api:latest
|
||||
cd deploy && docker compose up -d
|
||||
cd /root/sub2api/deploy && docker compose up -d
|
||||
```
|
||||
|
||||
### 6. 启动服务
|
||||
|
||||
```bash
|
||||
# 进入 deploy 目录
|
||||
cd deploy
|
||||
|
||||
# 启动所有服务(PostgreSQL、Redis、sub2api)
|
||||
docker compose up -d
|
||||
|
||||
# 查看服务状态
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
### 7. 验证部署
|
||||
### 6. 验证部署
|
||||
|
||||
```bash
|
||||
# 查看应用日志
|
||||
@@ -342,7 +389,7 @@ curl http://localhost:8080/health
|
||||
cat /root/sub2api/backend/cmd/server/VERSION
|
||||
```
|
||||
|
||||
### 8. 常用运维命令
|
||||
### 7. 常用运维命令
|
||||
|
||||
```bash
|
||||
# 查看实时日志
|
||||
@@ -415,7 +462,7 @@ docker stats sub2api
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **前端必须打包进镜像**:使用 `docker build` 在服务器上构建,Dockerfile 会自动编译前端并 embed 到后端二进制中
|
||||
1. **前端必须打包进镜像**:使用 `docker build` 在构建服务器(`us-asaki-root`)上构建,Dockerfile 会自动编译前端并 embed 到后端二进制中,构建完成后通过 `docker save | docker load` 传输到生产服务器(`clicodeplus`)
|
||||
|
||||
2. **镜像标签**:docker-compose.yml 使用 `weishaw/sub2api:latest`,本地构建后需要 `docker tag` 覆盖
|
||||
|
||||
|
||||
143
CLAUDE.md
143
CLAUDE.md
@@ -97,13 +97,22 @@ git push origin main
|
||||
|
||||
### 前置条件
|
||||
|
||||
- 本地已配置 SSH 别名 `clicodeplus` 连接到服务器
|
||||
- 服务器部署目录:`/root/sub2api`(正式)、`/root/sub2api-beta`(测试)
|
||||
- 服务器使用 Docker Compose 部署
|
||||
- 本地已配置 SSH 别名 `clicodeplus` 连接到生产服务器(运行服务)
|
||||
- 本地已配置 SSH 别名 `us-asaki-root` 连接到构建服务器(拉取代码、构建镜像)
|
||||
- 生产服务器部署目录:`/root/sub2api`(正式)、`/root/sub2api-beta`(测试)
|
||||
- 生产服务器使用 Docker Compose 部署
|
||||
- **镜像统一在构建服务器上构建**,避免生产服务器因编译占用 CPU/内存影响线上服务
|
||||
|
||||
### 服务器角色说明
|
||||
|
||||
| 服务器 | SSH 别名 | 职责 |
|
||||
|--------|----------|------|
|
||||
| 构建服务器 | `us-asaki-root` | 拉取代码、`docker build` 构建镜像 |
|
||||
| 生产服务器 | `clicodeplus` | 加载镜像、运行服务、部署验证 |
|
||||
|
||||
### 部署环境说明
|
||||
|
||||
| 环境 | 目录 | 端口 | 数据库 | 容器名 |
|
||||
| 环境 | 目录(生产服务器) | 端口 | 数据库 | 容器名 |
|
||||
|------|------|------|--------|--------|
|
||||
| 正式 | `/root/sub2api` | 8080 | `sub2api` | `sub2api` |
|
||||
| Beta | `/root/sub2api-beta` | 8084 | `beta` | `sub2api-beta` |
|
||||
@@ -155,26 +164,33 @@ git commit -m "chore: bump version to 0.1.69.2"
|
||||
git push origin release/custom-0.1.69
|
||||
```
|
||||
|
||||
#### 1. 服务器拉取代码
|
||||
#### 1. 构建服务器拉取代码
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "cd /root/sub2api && git fetch fork && git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69"
|
||||
ssh us-asaki-root "cd /root/sub2api && git fetch fork && git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69"
|
||||
```
|
||||
|
||||
#### 2. 服务器构建镜像
|
||||
#### 2. 构建服务器构建镜像
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "cd /root/sub2api && docker build --no-cache -t sub2api:latest -f Dockerfile ."
|
||||
ssh us-asaki-root "cd /root/sub2api && docker build --no-cache -t sub2api:latest -f Dockerfile ."
|
||||
```
|
||||
|
||||
#### 3. 更新镜像标签并重启服务
|
||||
#### 3. 传输镜像到生产服务器并加载
|
||||
|
||||
```bash
|
||||
# 导出镜像 → 通过管道传输 → 生产服务器加载
|
||||
ssh us-asaki-root "docker save sub2api:latest" | ssh clicodeplus "docker load"
|
||||
```
|
||||
|
||||
#### 4. 更新镜像标签并重启服务
|
||||
|
||||
```bash
|
||||
ssh clicodeplus "docker tag sub2api:latest weishaw/sub2api:latest"
|
||||
ssh clicodeplus "cd /root/sub2api/deploy && docker compose up -d --force-recreate sub2api"
|
||||
```
|
||||
|
||||
#### 4. 验证部署
|
||||
#### 5. 验证部署
|
||||
|
||||
```bash
|
||||
# 查看启动日志
|
||||
@@ -213,8 +229,8 @@ ssh clicodeplus "docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Ports}}' |
|
||||
### 首次部署步骤
|
||||
|
||||
```bash
|
||||
# 0) 进入服务器
|
||||
ssh clicodeplus
|
||||
# 0) 进入构建服务器
|
||||
ssh us-asaki-root
|
||||
|
||||
# 1) 克隆代码到新目录(示例使用你的 fork)
|
||||
cd /root
|
||||
@@ -222,7 +238,23 @@ git clone https://github.com/touwaeriol/sub2api.git sub2api-beta
|
||||
cd /root/sub2api-beta
|
||||
git checkout release/custom-0.1.71
|
||||
|
||||
# 2) 准备 beta 的 .env(敏感信息只写这里)
|
||||
# 2) 构建 beta 镜像
|
||||
docker build -t sub2api:beta -f Dockerfile .
|
||||
exit
|
||||
|
||||
# 3) 传输镜像到生产服务器
|
||||
ssh us-asaki-root "docker save sub2api:beta" | ssh clicodeplus "docker load"
|
||||
|
||||
# 4) 在生产服务器上准备 beta 环境
|
||||
ssh clicodeplus
|
||||
|
||||
# 克隆代码(仅用于 deploy 配置和版本号确认,不在此构建)
|
||||
cd /root
|
||||
git clone https://github.com/touwaeriol/sub2api.git sub2api-beta
|
||||
cd /root/sub2api-beta
|
||||
git checkout release/custom-0.1.71
|
||||
|
||||
# 5) 准备 beta 的 .env(敏感信息只写这里)
|
||||
cd /root/sub2api-beta/deploy
|
||||
|
||||
# 推荐:从现网 .env 复制,保证除 DB 名/用户/端口外完全一致
|
||||
@@ -233,7 +265,7 @@ perl -pi -e 's/^SERVER_PORT=.*/SERVER_PORT=8084/' ./.env
|
||||
perl -pi -e 's/^POSTGRES_USER=.*/POSTGRES_USER=beta/' ./.env
|
||||
perl -pi -e 's/^POSTGRES_DB=.*/POSTGRES_DB=beta/' ./.env
|
||||
|
||||
# 3) 写 compose override(避免与现网容器名冲突,镜像使用本地构建的 sub2api:beta)
|
||||
# 6) 写 compose override(避免与现网容器名冲突,镜像使用构建服务器传输的 sub2api:beta)
|
||||
cat > docker-compose.override.yml <<'YAML'
|
||||
services:
|
||||
sub2api:
|
||||
@@ -243,15 +275,11 @@ services:
|
||||
container_name: sub2api-beta-redis
|
||||
YAML
|
||||
|
||||
# 4) 构建 beta 镜像(基于当前代码)
|
||||
cd /root/sub2api-beta
|
||||
docker build -t sub2api:beta -f Dockerfile .
|
||||
|
||||
# 5) 启动 beta(独立 project,确保不影响现网)
|
||||
# 7) 启动 beta(独立 project,确保不影响现网)
|
||||
cd /root/sub2api-beta/deploy
|
||||
docker compose -p sub2api-beta --env-file .env -f docker-compose.yml -f docker-compose.override.yml up -d
|
||||
|
||||
# 6) 验证 beta
|
||||
# 8) 验证 beta
|
||||
curl -fsS http://127.0.0.1:8084/health
|
||||
docker logs sub2api-beta --tail 50
|
||||
```
|
||||
@@ -265,11 +293,20 @@ docker logs sub2api-beta --tail 50
|
||||
|
||||
注意:需要数据库侧已存在 `beta` 用户与 `beta` 数据库,并授予权限;否则容器会启动失败并不断重启。
|
||||
|
||||
### 更新 beta(拉代码 + 仅重建 beta 容器)
|
||||
### 更新 beta(构建服务器构建 + 传输 + 仅重启 beta 容器)
|
||||
|
||||
```bash
|
||||
# 1) 构建服务器拉取代码并构建镜像
|
||||
ssh us-asaki-root "set -e; cd /root/sub2api-beta && git fetch --all --tags && git checkout -f release/custom-0.1.71 && git reset --hard origin/release/custom-0.1.71"
|
||||
ssh us-asaki-root "cd /root/sub2api-beta && docker build -t sub2api:beta -f Dockerfile ."
|
||||
|
||||
# 2) 传输镜像到生产服务器
|
||||
ssh us-asaki-root "docker save sub2api:beta" | ssh clicodeplus "docker load"
|
||||
|
||||
# 3) 生产服务器同步代码(用于版本号确认和 deploy 配置)
|
||||
ssh clicodeplus "set -e; cd /root/sub2api-beta && git fetch --all --tags && git checkout -f release/custom-0.1.71 && git reset --hard origin/release/custom-0.1.71"
|
||||
ssh clicodeplus "cd /root/sub2api-beta && docker build -t sub2api:beta -f Dockerfile ."
|
||||
|
||||
# 4) 重启 beta 容器
|
||||
ssh clicodeplus "cd /root/sub2api-beta/deploy && docker compose -p sub2api-beta --env-file .env -f docker-compose.yml -f docker-compose.override.yml up -d --no-deps --force-recreate sub2api"
|
||||
ssh clicodeplus "curl -fsS http://127.0.0.1:8084/health"
|
||||
```
|
||||
@@ -284,7 +321,36 @@ ssh clicodeplus "cd /root/sub2api-beta/deploy && docker compose -p sub2api-beta
|
||||
|
||||
## 服务器首次部署
|
||||
|
||||
### 1. 克隆代码并配置远程仓库
|
||||
### 1. 构建服务器:克隆代码并配置远程仓库
|
||||
|
||||
```bash
|
||||
ssh us-asaki-root
|
||||
cd /root
|
||||
git clone https://github.com/Wei-Shaw/sub2api.git
|
||||
cd sub2api
|
||||
|
||||
# 添加 fork 仓库
|
||||
git remote add fork https://github.com/touwaeriol/sub2api.git
|
||||
```
|
||||
|
||||
### 2. 构建服务器:切换到定制分支并构建镜像
|
||||
|
||||
```bash
|
||||
git fetch fork
|
||||
git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69
|
||||
|
||||
cd /root/sub2api
|
||||
docker build -t sub2api:latest -f Dockerfile .
|
||||
exit
|
||||
```
|
||||
|
||||
### 3. 传输镜像到生产服务器
|
||||
|
||||
```bash
|
||||
ssh us-asaki-root "docker save sub2api:latest" | ssh clicodeplus "docker load"
|
||||
```
|
||||
|
||||
### 4. 生产服务器:克隆代码并配置环境
|
||||
|
||||
```bash
|
||||
ssh clicodeplus
|
||||
@@ -294,42 +360,23 @@ cd sub2api
|
||||
|
||||
# 添加 fork 仓库
|
||||
git remote add fork https://github.com/touwaeriol/sub2api.git
|
||||
```
|
||||
|
||||
### 2. 切换到定制分支并配置环境
|
||||
|
||||
```bash
|
||||
git fetch fork
|
||||
git checkout -B release/custom-0.1.69 fork/release/custom-0.1.69
|
||||
|
||||
# 配置环境变量
|
||||
cd deploy
|
||||
cp .env.example .env
|
||||
vim .env # 配置 DATABASE_URL, REDIS_URL, JWT_SECRET 等
|
||||
```
|
||||
|
||||
### 3. 构建并启动
|
||||
### 5. 生产服务器:更新镜像标签并启动服务
|
||||
|
||||
```bash
|
||||
cd /root/sub2api
|
||||
docker build -t sub2api:latest -f Dockerfile .
|
||||
docker tag sub2api:latest weishaw/sub2api:latest
|
||||
cd deploy && docker compose up -d
|
||||
cd /root/sub2api/deploy && docker compose up -d
|
||||
```
|
||||
|
||||
### 6. 启动服务
|
||||
|
||||
```bash
|
||||
# 进入 deploy 目录
|
||||
cd deploy
|
||||
|
||||
# 启动所有服务(PostgreSQL、Redis、sub2api)
|
||||
docker compose up -d
|
||||
|
||||
# 查看服务状态
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
### 7. 验证部署
|
||||
### 6. 验证部署
|
||||
|
||||
```bash
|
||||
# 查看应用日志
|
||||
@@ -342,7 +389,7 @@ curl http://localhost:8080/health
|
||||
cat /root/sub2api/backend/cmd/server/VERSION
|
||||
```
|
||||
|
||||
### 8. 常用运维命令
|
||||
### 7. 常用运维命令
|
||||
|
||||
```bash
|
||||
# 查看实时日志
|
||||
@@ -415,7 +462,7 @@ docker stats sub2api
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **前端必须打包进镜像**:使用 `docker build` 在服务器上构建,Dockerfile 会自动编译前端并 embed 到后端二进制中
|
||||
1. **前端必须打包进镜像**:使用 `docker build` 在构建服务器(`us-asaki-root`)上构建,Dockerfile 会自动编译前端并 embed 到后端二进制中,构建完成后通过 `docker save | docker load` 传输到生产服务器(`clicodeplus`)
|
||||
|
||||
2. **镜像标签**:docker-compose.yml 使用 `weishaw/sub2api:latest`,本地构建后需要 `docker tag` 覆盖
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
0.1.78.1
|
||||
0.1.79.2
|
||||
|
||||
@@ -44,6 +44,8 @@ type ErrorPassthroughRule struct {
|
||||
PassthroughBody bool `json:"passthrough_body,omitempty"`
|
||||
// CustomMessage holds the value of the "custom_message" field.
|
||||
CustomMessage *string `json:"custom_message,omitempty"`
|
||||
// SkipMonitoring holds the value of the "skip_monitoring" field.
|
||||
SkipMonitoring bool `json:"skip_monitoring,omitempty"`
|
||||
// Description holds the value of the "description" field.
|
||||
Description *string `json:"description,omitempty"`
|
||||
selectValues sql.SelectValues
|
||||
@@ -56,7 +58,7 @@ func (*ErrorPassthroughRule) scanValues(columns []string) ([]any, error) {
|
||||
switch columns[i] {
|
||||
case errorpassthroughrule.FieldErrorCodes, errorpassthroughrule.FieldKeywords, errorpassthroughrule.FieldPlatforms:
|
||||
values[i] = new([]byte)
|
||||
case errorpassthroughrule.FieldEnabled, errorpassthroughrule.FieldPassthroughCode, errorpassthroughrule.FieldPassthroughBody:
|
||||
case errorpassthroughrule.FieldEnabled, errorpassthroughrule.FieldPassthroughCode, errorpassthroughrule.FieldPassthroughBody, errorpassthroughrule.FieldSkipMonitoring:
|
||||
values[i] = new(sql.NullBool)
|
||||
case errorpassthroughrule.FieldID, errorpassthroughrule.FieldPriority, errorpassthroughrule.FieldResponseCode:
|
||||
values[i] = new(sql.NullInt64)
|
||||
@@ -171,6 +173,12 @@ func (_m *ErrorPassthroughRule) assignValues(columns []string, values []any) err
|
||||
_m.CustomMessage = new(string)
|
||||
*_m.CustomMessage = value.String
|
||||
}
|
||||
case errorpassthroughrule.FieldSkipMonitoring:
|
||||
if value, ok := values[i].(*sql.NullBool); !ok {
|
||||
return fmt.Errorf("unexpected type %T for field skip_monitoring", values[i])
|
||||
} else if value.Valid {
|
||||
_m.SkipMonitoring = value.Bool
|
||||
}
|
||||
case errorpassthroughrule.FieldDescription:
|
||||
if value, ok := values[i].(*sql.NullString); !ok {
|
||||
return fmt.Errorf("unexpected type %T for field description", values[i])
|
||||
@@ -257,6 +265,9 @@ func (_m *ErrorPassthroughRule) String() string {
|
||||
builder.WriteString(*v)
|
||||
}
|
||||
builder.WriteString(", ")
|
||||
builder.WriteString("skip_monitoring=")
|
||||
builder.WriteString(fmt.Sprintf("%v", _m.SkipMonitoring))
|
||||
builder.WriteString(", ")
|
||||
if v := _m.Description; v != nil {
|
||||
builder.WriteString("description=")
|
||||
builder.WriteString(*v)
|
||||
|
||||
@@ -39,6 +39,8 @@ const (
|
||||
FieldPassthroughBody = "passthrough_body"
|
||||
// FieldCustomMessage holds the string denoting the custom_message field in the database.
|
||||
FieldCustomMessage = "custom_message"
|
||||
// FieldSkipMonitoring holds the string denoting the skip_monitoring field in the database.
|
||||
FieldSkipMonitoring = "skip_monitoring"
|
||||
// FieldDescription holds the string denoting the description field in the database.
|
||||
FieldDescription = "description"
|
||||
// Table holds the table name of the errorpassthroughrule in the database.
|
||||
@@ -61,6 +63,7 @@ var Columns = []string{
|
||||
FieldResponseCode,
|
||||
FieldPassthroughBody,
|
||||
FieldCustomMessage,
|
||||
FieldSkipMonitoring,
|
||||
FieldDescription,
|
||||
}
|
||||
|
||||
@@ -95,6 +98,8 @@ var (
|
||||
DefaultPassthroughCode bool
|
||||
// DefaultPassthroughBody holds the default value on creation for the "passthrough_body" field.
|
||||
DefaultPassthroughBody bool
|
||||
// DefaultSkipMonitoring holds the default value on creation for the "skip_monitoring" field.
|
||||
DefaultSkipMonitoring bool
|
||||
)
|
||||
|
||||
// OrderOption defines the ordering options for the ErrorPassthroughRule queries.
|
||||
@@ -155,6 +160,11 @@ func ByCustomMessage(opts ...sql.OrderTermOption) OrderOption {
|
||||
return sql.OrderByField(FieldCustomMessage, opts...).ToFunc()
|
||||
}
|
||||
|
||||
// BySkipMonitoring orders the results by the skip_monitoring field.
|
||||
func BySkipMonitoring(opts ...sql.OrderTermOption) OrderOption {
|
||||
return sql.OrderByField(FieldSkipMonitoring, opts...).ToFunc()
|
||||
}
|
||||
|
||||
// ByDescription orders the results by the description field.
|
||||
func ByDescription(opts ...sql.OrderTermOption) OrderOption {
|
||||
return sql.OrderByField(FieldDescription, opts...).ToFunc()
|
||||
|
||||
@@ -104,6 +104,11 @@ func CustomMessage(v string) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldEQ(FieldCustomMessage, v))
|
||||
}
|
||||
|
||||
// SkipMonitoring applies equality check predicate on the "skip_monitoring" field. It's identical to SkipMonitoringEQ.
|
||||
func SkipMonitoring(v bool) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldEQ(FieldSkipMonitoring, v))
|
||||
}
|
||||
|
||||
// Description applies equality check predicate on the "description" field. It's identical to DescriptionEQ.
|
||||
func Description(v string) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldEQ(FieldDescription, v))
|
||||
@@ -544,6 +549,16 @@ func CustomMessageContainsFold(v string) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldContainsFold(FieldCustomMessage, v))
|
||||
}
|
||||
|
||||
// SkipMonitoringEQ applies the EQ predicate on the "skip_monitoring" field.
|
||||
func SkipMonitoringEQ(v bool) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldEQ(FieldSkipMonitoring, v))
|
||||
}
|
||||
|
||||
// SkipMonitoringNEQ applies the NEQ predicate on the "skip_monitoring" field.
|
||||
func SkipMonitoringNEQ(v bool) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldNEQ(FieldSkipMonitoring, v))
|
||||
}
|
||||
|
||||
// DescriptionEQ applies the EQ predicate on the "description" field.
|
||||
func DescriptionEQ(v string) predicate.ErrorPassthroughRule {
|
||||
return predicate.ErrorPassthroughRule(sql.FieldEQ(FieldDescription, v))
|
||||
|
||||
@@ -172,6 +172,20 @@ func (_c *ErrorPassthroughRuleCreate) SetNillableCustomMessage(v *string) *Error
|
||||
return _c
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (_c *ErrorPassthroughRuleCreate) SetSkipMonitoring(v bool) *ErrorPassthroughRuleCreate {
|
||||
_c.mutation.SetSkipMonitoring(v)
|
||||
return _c
|
||||
}
|
||||
|
||||
// SetNillableSkipMonitoring sets the "skip_monitoring" field if the given value is not nil.
|
||||
func (_c *ErrorPassthroughRuleCreate) SetNillableSkipMonitoring(v *bool) *ErrorPassthroughRuleCreate {
|
||||
if v != nil {
|
||||
_c.SetSkipMonitoring(*v)
|
||||
}
|
||||
return _c
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (_c *ErrorPassthroughRuleCreate) SetDescription(v string) *ErrorPassthroughRuleCreate {
|
||||
_c.mutation.SetDescription(v)
|
||||
@@ -249,6 +263,10 @@ func (_c *ErrorPassthroughRuleCreate) defaults() {
|
||||
v := errorpassthroughrule.DefaultPassthroughBody
|
||||
_c.mutation.SetPassthroughBody(v)
|
||||
}
|
||||
if _, ok := _c.mutation.SkipMonitoring(); !ok {
|
||||
v := errorpassthroughrule.DefaultSkipMonitoring
|
||||
_c.mutation.SetSkipMonitoring(v)
|
||||
}
|
||||
}
|
||||
|
||||
// check runs all checks and user-defined validators on the builder.
|
||||
@@ -287,6 +305,9 @@ func (_c *ErrorPassthroughRuleCreate) check() error {
|
||||
if _, ok := _c.mutation.PassthroughBody(); !ok {
|
||||
return &ValidationError{Name: "passthrough_body", err: errors.New(`ent: missing required field "ErrorPassthroughRule.passthrough_body"`)}
|
||||
}
|
||||
if _, ok := _c.mutation.SkipMonitoring(); !ok {
|
||||
return &ValidationError{Name: "skip_monitoring", err: errors.New(`ent: missing required field "ErrorPassthroughRule.skip_monitoring"`)}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -366,6 +387,10 @@ func (_c *ErrorPassthroughRuleCreate) createSpec() (*ErrorPassthroughRule, *sqlg
|
||||
_spec.SetField(errorpassthroughrule.FieldCustomMessage, field.TypeString, value)
|
||||
_node.CustomMessage = &value
|
||||
}
|
||||
if value, ok := _c.mutation.SkipMonitoring(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldSkipMonitoring, field.TypeBool, value)
|
||||
_node.SkipMonitoring = value
|
||||
}
|
||||
if value, ok := _c.mutation.Description(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldDescription, field.TypeString, value)
|
||||
_node.Description = &value
|
||||
@@ -608,6 +633,18 @@ func (u *ErrorPassthroughRuleUpsert) ClearCustomMessage() *ErrorPassthroughRuleU
|
||||
return u
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (u *ErrorPassthroughRuleUpsert) SetSkipMonitoring(v bool) *ErrorPassthroughRuleUpsert {
|
||||
u.Set(errorpassthroughrule.FieldSkipMonitoring, v)
|
||||
return u
|
||||
}
|
||||
|
||||
// UpdateSkipMonitoring sets the "skip_monitoring" field to the value that was provided on create.
|
||||
func (u *ErrorPassthroughRuleUpsert) UpdateSkipMonitoring() *ErrorPassthroughRuleUpsert {
|
||||
u.SetExcluded(errorpassthroughrule.FieldSkipMonitoring)
|
||||
return u
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (u *ErrorPassthroughRuleUpsert) SetDescription(v string) *ErrorPassthroughRuleUpsert {
|
||||
u.Set(errorpassthroughrule.FieldDescription, v)
|
||||
@@ -888,6 +925,20 @@ func (u *ErrorPassthroughRuleUpsertOne) ClearCustomMessage() *ErrorPassthroughRu
|
||||
})
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (u *ErrorPassthroughRuleUpsertOne) SetSkipMonitoring(v bool) *ErrorPassthroughRuleUpsertOne {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
s.SetSkipMonitoring(v)
|
||||
})
|
||||
}
|
||||
|
||||
// UpdateSkipMonitoring sets the "skip_monitoring" field to the value that was provided on create.
|
||||
func (u *ErrorPassthroughRuleUpsertOne) UpdateSkipMonitoring() *ErrorPassthroughRuleUpsertOne {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
s.UpdateSkipMonitoring()
|
||||
})
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (u *ErrorPassthroughRuleUpsertOne) SetDescription(v string) *ErrorPassthroughRuleUpsertOne {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
@@ -1337,6 +1388,20 @@ func (u *ErrorPassthroughRuleUpsertBulk) ClearCustomMessage() *ErrorPassthroughR
|
||||
})
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (u *ErrorPassthroughRuleUpsertBulk) SetSkipMonitoring(v bool) *ErrorPassthroughRuleUpsertBulk {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
s.SetSkipMonitoring(v)
|
||||
})
|
||||
}
|
||||
|
||||
// UpdateSkipMonitoring sets the "skip_monitoring" field to the value that was provided on create.
|
||||
func (u *ErrorPassthroughRuleUpsertBulk) UpdateSkipMonitoring() *ErrorPassthroughRuleUpsertBulk {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
s.UpdateSkipMonitoring()
|
||||
})
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (u *ErrorPassthroughRuleUpsertBulk) SetDescription(v string) *ErrorPassthroughRuleUpsertBulk {
|
||||
return u.Update(func(s *ErrorPassthroughRuleUpsert) {
|
||||
|
||||
@@ -227,6 +227,20 @@ func (_u *ErrorPassthroughRuleUpdate) ClearCustomMessage() *ErrorPassthroughRule
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (_u *ErrorPassthroughRuleUpdate) SetSkipMonitoring(v bool) *ErrorPassthroughRuleUpdate {
|
||||
_u.mutation.SetSkipMonitoring(v)
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetNillableSkipMonitoring sets the "skip_monitoring" field if the given value is not nil.
|
||||
func (_u *ErrorPassthroughRuleUpdate) SetNillableSkipMonitoring(v *bool) *ErrorPassthroughRuleUpdate {
|
||||
if v != nil {
|
||||
_u.SetSkipMonitoring(*v)
|
||||
}
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (_u *ErrorPassthroughRuleUpdate) SetDescription(v string) *ErrorPassthroughRuleUpdate {
|
||||
_u.mutation.SetDescription(v)
|
||||
@@ -387,6 +401,9 @@ func (_u *ErrorPassthroughRuleUpdate) sqlSave(ctx context.Context) (_node int, e
|
||||
if _u.mutation.CustomMessageCleared() {
|
||||
_spec.ClearField(errorpassthroughrule.FieldCustomMessage, field.TypeString)
|
||||
}
|
||||
if value, ok := _u.mutation.SkipMonitoring(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldSkipMonitoring, field.TypeBool, value)
|
||||
}
|
||||
if value, ok := _u.mutation.Description(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldDescription, field.TypeString, value)
|
||||
}
|
||||
@@ -611,6 +628,20 @@ func (_u *ErrorPassthroughRuleUpdateOne) ClearCustomMessage() *ErrorPassthroughR
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (_u *ErrorPassthroughRuleUpdateOne) SetSkipMonitoring(v bool) *ErrorPassthroughRuleUpdateOne {
|
||||
_u.mutation.SetSkipMonitoring(v)
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetNillableSkipMonitoring sets the "skip_monitoring" field if the given value is not nil.
|
||||
func (_u *ErrorPassthroughRuleUpdateOne) SetNillableSkipMonitoring(v *bool) *ErrorPassthroughRuleUpdateOne {
|
||||
if v != nil {
|
||||
_u.SetSkipMonitoring(*v)
|
||||
}
|
||||
return _u
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (_u *ErrorPassthroughRuleUpdateOne) SetDescription(v string) *ErrorPassthroughRuleUpdateOne {
|
||||
_u.mutation.SetDescription(v)
|
||||
@@ -801,6 +832,9 @@ func (_u *ErrorPassthroughRuleUpdateOne) sqlSave(ctx context.Context) (_node *Er
|
||||
if _u.mutation.CustomMessageCleared() {
|
||||
_spec.ClearField(errorpassthroughrule.FieldCustomMessage, field.TypeString)
|
||||
}
|
||||
if value, ok := _u.mutation.SkipMonitoring(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldSkipMonitoring, field.TypeBool, value)
|
||||
}
|
||||
if value, ok := _u.mutation.Description(); ok {
|
||||
_spec.SetField(errorpassthroughrule.FieldDescription, field.TypeString, value)
|
||||
}
|
||||
|
||||
@@ -325,6 +325,7 @@ var (
|
||||
{Name: "response_code", Type: field.TypeInt, Nullable: true},
|
||||
{Name: "passthrough_body", Type: field.TypeBool, Default: true},
|
||||
{Name: "custom_message", Type: field.TypeString, Nullable: true, Size: 2147483647},
|
||||
{Name: "skip_monitoring", Type: field.TypeBool, Default: false},
|
||||
{Name: "description", Type: field.TypeString, Nullable: true, Size: 2147483647},
|
||||
}
|
||||
// ErrorPassthroughRulesTable holds the schema information for the "error_passthrough_rules" table.
|
||||
|
||||
@@ -5776,6 +5776,7 @@ type ErrorPassthroughRuleMutation struct {
|
||||
addresponse_code *int
|
||||
passthrough_body *bool
|
||||
custom_message *string
|
||||
skip_monitoring *bool
|
||||
description *string
|
||||
clearedFields map[string]struct{}
|
||||
done bool
|
||||
@@ -6503,6 +6504,42 @@ func (m *ErrorPassthroughRuleMutation) ResetCustomMessage() {
|
||||
delete(m.clearedFields, errorpassthroughrule.FieldCustomMessage)
|
||||
}
|
||||
|
||||
// SetSkipMonitoring sets the "skip_monitoring" field.
|
||||
func (m *ErrorPassthroughRuleMutation) SetSkipMonitoring(b bool) {
|
||||
m.skip_monitoring = &b
|
||||
}
|
||||
|
||||
// SkipMonitoring returns the value of the "skip_monitoring" field in the mutation.
|
||||
func (m *ErrorPassthroughRuleMutation) SkipMonitoring() (r bool, exists bool) {
|
||||
v := m.skip_monitoring
|
||||
if v == nil {
|
||||
return
|
||||
}
|
||||
return *v, true
|
||||
}
|
||||
|
||||
// OldSkipMonitoring returns the old "skip_monitoring" field's value of the ErrorPassthroughRule entity.
|
||||
// If the ErrorPassthroughRule object wasn't provided to the builder, the object is fetched from the database.
|
||||
// An error is returned if the mutation operation is not UpdateOne, or the database query fails.
|
||||
func (m *ErrorPassthroughRuleMutation) OldSkipMonitoring(ctx context.Context) (v bool, err error) {
|
||||
if !m.op.Is(OpUpdateOne) {
|
||||
return v, errors.New("OldSkipMonitoring is only allowed on UpdateOne operations")
|
||||
}
|
||||
if m.id == nil || m.oldValue == nil {
|
||||
return v, errors.New("OldSkipMonitoring requires an ID field in the mutation")
|
||||
}
|
||||
oldValue, err := m.oldValue(ctx)
|
||||
if err != nil {
|
||||
return v, fmt.Errorf("querying old value for OldSkipMonitoring: %w", err)
|
||||
}
|
||||
return oldValue.SkipMonitoring, nil
|
||||
}
|
||||
|
||||
// ResetSkipMonitoring resets all changes to the "skip_monitoring" field.
|
||||
func (m *ErrorPassthroughRuleMutation) ResetSkipMonitoring() {
|
||||
m.skip_monitoring = nil
|
||||
}
|
||||
|
||||
// SetDescription sets the "description" field.
|
||||
func (m *ErrorPassthroughRuleMutation) SetDescription(s string) {
|
||||
m.description = &s
|
||||
@@ -6586,7 +6623,7 @@ func (m *ErrorPassthroughRuleMutation) Type() string {
|
||||
// order to get all numeric fields that were incremented/decremented, call
|
||||
// AddedFields().
|
||||
func (m *ErrorPassthroughRuleMutation) Fields() []string {
|
||||
fields := make([]string, 0, 14)
|
||||
fields := make([]string, 0, 15)
|
||||
if m.created_at != nil {
|
||||
fields = append(fields, errorpassthroughrule.FieldCreatedAt)
|
||||
}
|
||||
@@ -6626,6 +6663,9 @@ func (m *ErrorPassthroughRuleMutation) Fields() []string {
|
||||
if m.custom_message != nil {
|
||||
fields = append(fields, errorpassthroughrule.FieldCustomMessage)
|
||||
}
|
||||
if m.skip_monitoring != nil {
|
||||
fields = append(fields, errorpassthroughrule.FieldSkipMonitoring)
|
||||
}
|
||||
if m.description != nil {
|
||||
fields = append(fields, errorpassthroughrule.FieldDescription)
|
||||
}
|
||||
@@ -6663,6 +6703,8 @@ func (m *ErrorPassthroughRuleMutation) Field(name string) (ent.Value, bool) {
|
||||
return m.PassthroughBody()
|
||||
case errorpassthroughrule.FieldCustomMessage:
|
||||
return m.CustomMessage()
|
||||
case errorpassthroughrule.FieldSkipMonitoring:
|
||||
return m.SkipMonitoring()
|
||||
case errorpassthroughrule.FieldDescription:
|
||||
return m.Description()
|
||||
}
|
||||
@@ -6700,6 +6742,8 @@ func (m *ErrorPassthroughRuleMutation) OldField(ctx context.Context, name string
|
||||
return m.OldPassthroughBody(ctx)
|
||||
case errorpassthroughrule.FieldCustomMessage:
|
||||
return m.OldCustomMessage(ctx)
|
||||
case errorpassthroughrule.FieldSkipMonitoring:
|
||||
return m.OldSkipMonitoring(ctx)
|
||||
case errorpassthroughrule.FieldDescription:
|
||||
return m.OldDescription(ctx)
|
||||
}
|
||||
@@ -6802,6 +6846,13 @@ func (m *ErrorPassthroughRuleMutation) SetField(name string, value ent.Value) er
|
||||
}
|
||||
m.SetCustomMessage(v)
|
||||
return nil
|
||||
case errorpassthroughrule.FieldSkipMonitoring:
|
||||
v, ok := value.(bool)
|
||||
if !ok {
|
||||
return fmt.Errorf("unexpected type %T for field %s", value, name)
|
||||
}
|
||||
m.SetSkipMonitoring(v)
|
||||
return nil
|
||||
case errorpassthroughrule.FieldDescription:
|
||||
v, ok := value.(string)
|
||||
if !ok {
|
||||
@@ -6963,6 +7014,9 @@ func (m *ErrorPassthroughRuleMutation) ResetField(name string) error {
|
||||
case errorpassthroughrule.FieldCustomMessage:
|
||||
m.ResetCustomMessage()
|
||||
return nil
|
||||
case errorpassthroughrule.FieldSkipMonitoring:
|
||||
m.ResetSkipMonitoring()
|
||||
return nil
|
||||
case errorpassthroughrule.FieldDescription:
|
||||
m.ResetDescription()
|
||||
return nil
|
||||
|
||||
@@ -326,6 +326,10 @@ func init() {
|
||||
errorpassthroughruleDescPassthroughBody := errorpassthroughruleFields[9].Descriptor()
|
||||
// errorpassthroughrule.DefaultPassthroughBody holds the default value on creation for the passthrough_body field.
|
||||
errorpassthroughrule.DefaultPassthroughBody = errorpassthroughruleDescPassthroughBody.Default.(bool)
|
||||
// errorpassthroughruleDescSkipMonitoring is the schema descriptor for skip_monitoring field.
|
||||
errorpassthroughruleDescSkipMonitoring := errorpassthroughruleFields[11].Descriptor()
|
||||
// errorpassthroughrule.DefaultSkipMonitoring holds the default value on creation for the skip_monitoring field.
|
||||
errorpassthroughrule.DefaultSkipMonitoring = errorpassthroughruleDescSkipMonitoring.Default.(bool)
|
||||
groupMixin := schema.Group{}.Mixin()
|
||||
groupMixinHooks1 := groupMixin[1].Hooks()
|
||||
group.Hooks[0] = groupMixinHooks1[0]
|
||||
|
||||
@@ -105,6 +105,12 @@ func (ErrorPassthroughRule) Fields() []ent.Field {
|
||||
Optional().
|
||||
Nillable(),
|
||||
|
||||
// skip_monitoring: 是否跳过运维监控记录
|
||||
// true: 匹配此规则的错误不会被记录到 ops_error_logs
|
||||
// false: 正常记录到运维监控(默认行为)
|
||||
field.Bool("skip_monitoring").
|
||||
Default(false),
|
||||
|
||||
// description: 规则描述,用于说明规则的用途
|
||||
field.Text("description").
|
||||
Optional().
|
||||
|
||||
@@ -32,6 +32,7 @@ type CreateErrorPassthroughRuleRequest struct {
|
||||
ResponseCode *int `json:"response_code"`
|
||||
PassthroughBody *bool `json:"passthrough_body"`
|
||||
CustomMessage *string `json:"custom_message"`
|
||||
SkipMonitoring *bool `json:"skip_monitoring"`
|
||||
Description *string `json:"description"`
|
||||
}
|
||||
|
||||
@@ -48,6 +49,7 @@ type UpdateErrorPassthroughRuleRequest struct {
|
||||
ResponseCode *int `json:"response_code"`
|
||||
PassthroughBody *bool `json:"passthrough_body"`
|
||||
CustomMessage *string `json:"custom_message"`
|
||||
SkipMonitoring *bool `json:"skip_monitoring"`
|
||||
Description *string `json:"description"`
|
||||
}
|
||||
|
||||
@@ -122,6 +124,9 @@ func (h *ErrorPassthroughHandler) Create(c *gin.Context) {
|
||||
} else {
|
||||
rule.PassthroughBody = true
|
||||
}
|
||||
if req.SkipMonitoring != nil {
|
||||
rule.SkipMonitoring = *req.SkipMonitoring
|
||||
}
|
||||
rule.ResponseCode = req.ResponseCode
|
||||
rule.CustomMessage = req.CustomMessage
|
||||
rule.Description = req.Description
|
||||
@@ -190,6 +195,7 @@ func (h *ErrorPassthroughHandler) Update(c *gin.Context) {
|
||||
ResponseCode: existing.ResponseCode,
|
||||
PassthroughBody: existing.PassthroughBody,
|
||||
CustomMessage: existing.CustomMessage,
|
||||
SkipMonitoring: existing.SkipMonitoring,
|
||||
Description: existing.Description,
|
||||
}
|
||||
|
||||
@@ -230,6 +236,9 @@ func (h *ErrorPassthroughHandler) Update(c *gin.Context) {
|
||||
if req.Description != nil {
|
||||
rule.Description = req.Description
|
||||
}
|
||||
if req.SkipMonitoring != nil {
|
||||
rule.SkipMonitoring = *req.SkipMonitoring
|
||||
}
|
||||
|
||||
// 确保切片不为 nil
|
||||
if rule.ErrorCodes == nil {
|
||||
|
||||
125
backend/internal/handler/failover_loop.go
Normal file
125
backend/internal/handler/failover_loop.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
)
|
||||
|
||||
// TempUnscheduler 用于 HandleFailoverError 中同账号重试耗尽后的临时封禁。
|
||||
// GatewayService 隐式实现此接口。
|
||||
type TempUnscheduler interface {
|
||||
TempUnscheduleRetryableError(ctx context.Context, accountID int64, failoverErr *service.UpstreamFailoverError)
|
||||
}
|
||||
|
||||
// FailoverAction 表示 failover 错误处理后的下一步动作
|
||||
type FailoverAction int
|
||||
|
||||
const (
|
||||
// FailoverRetry 同账号重试(调用方应 continue 重新进入循环,不更换账号)
|
||||
FailoverRetry FailoverAction = iota
|
||||
// FailoverSwitch 切换账号(调用方应 continue 重新选择账号)
|
||||
FailoverSwitch
|
||||
// FailoverExhausted 切换次数耗尽(调用方应返回错误响应)
|
||||
FailoverExhausted
|
||||
// FailoverCanceled context 已取消(调用方应直接 return)
|
||||
FailoverCanceled
|
||||
)
|
||||
|
||||
const (
|
||||
// maxSameAccountRetries 同账号重试次数上限(针对 RetryableOnSameAccount 错误)
|
||||
maxSameAccountRetries = 2
|
||||
// sameAccountRetryDelay 同账号重试间隔
|
||||
sameAccountRetryDelay = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
// FailoverState 跨循环迭代共享的 failover 状态
|
||||
type FailoverState struct {
|
||||
SwitchCount int
|
||||
MaxSwitches int
|
||||
FailedAccountIDs map[int64]struct{}
|
||||
SameAccountRetryCount map[int64]int
|
||||
LastFailoverErr *service.UpstreamFailoverError
|
||||
ForceCacheBilling bool
|
||||
hasBoundSession bool
|
||||
}
|
||||
|
||||
// NewFailoverState 创建 failover 状态
|
||||
func NewFailoverState(maxSwitches int, hasBoundSession bool) *FailoverState {
|
||||
return &FailoverState{
|
||||
MaxSwitches: maxSwitches,
|
||||
FailedAccountIDs: make(map[int64]struct{}),
|
||||
SameAccountRetryCount: make(map[int64]int),
|
||||
hasBoundSession: hasBoundSession,
|
||||
}
|
||||
}
|
||||
|
||||
// HandleFailoverError 处理 UpstreamFailoverError,返回下一步动作。
|
||||
// 包含:缓存计费判断、同账号重试、临时封禁、切换计数、Antigravity 延时。
|
||||
func (s *FailoverState) HandleFailoverError(
|
||||
ctx context.Context,
|
||||
gatewayService TempUnscheduler,
|
||||
accountID int64,
|
||||
platform string,
|
||||
failoverErr *service.UpstreamFailoverError,
|
||||
) FailoverAction {
|
||||
s.LastFailoverErr = failoverErr
|
||||
|
||||
// 缓存计费判断
|
||||
if needForceCacheBilling(s.hasBoundSession, failoverErr) {
|
||||
s.ForceCacheBilling = true
|
||||
}
|
||||
|
||||
// 同账号重试:对 RetryableOnSameAccount 的临时性错误,先在同一账号上重试
|
||||
if failoverErr.RetryableOnSameAccount && s.SameAccountRetryCount[accountID] < maxSameAccountRetries {
|
||||
s.SameAccountRetryCount[accountID]++
|
||||
log.Printf("Account %d: retryable error %d, same-account retry %d/%d",
|
||||
accountID, failoverErr.StatusCode, s.SameAccountRetryCount[accountID], maxSameAccountRetries)
|
||||
if !sleepWithContext(ctx, sameAccountRetryDelay) {
|
||||
return FailoverCanceled
|
||||
}
|
||||
return FailoverRetry
|
||||
}
|
||||
|
||||
// 同账号重试用尽,执行临时封禁
|
||||
if failoverErr.RetryableOnSameAccount {
|
||||
gatewayService.TempUnscheduleRetryableError(ctx, accountID, failoverErr)
|
||||
}
|
||||
|
||||
// 加入失败列表
|
||||
s.FailedAccountIDs[accountID] = struct{}{}
|
||||
|
||||
// 检查是否耗尽
|
||||
if s.SwitchCount >= s.MaxSwitches {
|
||||
return FailoverExhausted
|
||||
}
|
||||
|
||||
// 递增切换计数
|
||||
s.SwitchCount++
|
||||
log.Printf("Account %d: upstream error %d, switching account %d/%d",
|
||||
accountID, failoverErr.StatusCode, s.SwitchCount, s.MaxSwitches)
|
||||
|
||||
// Antigravity 平台换号线性递增延时
|
||||
if platform == service.PlatformAntigravity {
|
||||
if !sleepFailoverDelay(ctx, s.SwitchCount) {
|
||||
return FailoverCanceled
|
||||
}
|
||||
}
|
||||
|
||||
return FailoverSwitch
|
||||
}
|
||||
|
||||
// sleepWithContext 等待指定时长,返回 false 表示 context 已取消。
|
||||
func sleepWithContext(ctx context.Context, d time.Duration) bool {
|
||||
if d <= 0 {
|
||||
return true
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case <-time.After(d):
|
||||
return true
|
||||
}
|
||||
}
|
||||
657
backend/internal/handler/failover_loop_test.go
Normal file
657
backend/internal/handler/failover_loop_test.go
Normal file
@@ -0,0 +1,657 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// mockTempUnscheduler 记录 TempUnscheduleRetryableError 的调用信息。
|
||||
type mockTempUnscheduler struct {
|
||||
calls []tempUnscheduleCall
|
||||
}
|
||||
|
||||
type tempUnscheduleCall struct {
|
||||
accountID int64
|
||||
failoverErr *service.UpstreamFailoverError
|
||||
}
|
||||
|
||||
func (m *mockTempUnscheduler) TempUnscheduleRetryableError(_ context.Context, accountID int64, failoverErr *service.UpstreamFailoverError) {
|
||||
m.calls = append(m.calls, tempUnscheduleCall{accountID: accountID, failoverErr: failoverErr})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func newTestFailoverErr(statusCode int, retryable, forceBilling bool) *service.UpstreamFailoverError {
|
||||
return &service.UpstreamFailoverError{
|
||||
StatusCode: statusCode,
|
||||
RetryableOnSameAccount: retryable,
|
||||
ForceCacheBilling: forceBilling,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// NewFailoverState 测试
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestNewFailoverState(t *testing.T) {
|
||||
t.Run("初始化字段正确", func(t *testing.T) {
|
||||
fs := NewFailoverState(5, true)
|
||||
require.Equal(t, 5, fs.MaxSwitches)
|
||||
require.Equal(t, 0, fs.SwitchCount)
|
||||
require.NotNil(t, fs.FailedAccountIDs)
|
||||
require.Empty(t, fs.FailedAccountIDs)
|
||||
require.NotNil(t, fs.SameAccountRetryCount)
|
||||
require.Empty(t, fs.SameAccountRetryCount)
|
||||
require.Nil(t, fs.LastFailoverErr)
|
||||
require.False(t, fs.ForceCacheBilling)
|
||||
require.True(t, fs.hasBoundSession)
|
||||
})
|
||||
|
||||
t.Run("无绑定会话", func(t *testing.T) {
|
||||
fs := NewFailoverState(3, false)
|
||||
require.Equal(t, 3, fs.MaxSwitches)
|
||||
require.False(t, fs.hasBoundSession)
|
||||
})
|
||||
|
||||
t.Run("零最大切换次数", func(t *testing.T) {
|
||||
fs := NewFailoverState(0, false)
|
||||
require.Equal(t, 0, fs.MaxSwitches)
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// sleepWithContext 测试
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestSleepWithContext(t *testing.T) {
|
||||
t.Run("零时长立即返回true", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
ok := sleepWithContext(context.Background(), 0)
|
||||
require.True(t, ok)
|
||||
require.Less(t, time.Since(start), 50*time.Millisecond)
|
||||
})
|
||||
|
||||
t.Run("负时长立即返回true", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
ok := sleepWithContext(context.Background(), -1*time.Second)
|
||||
require.True(t, ok)
|
||||
require.Less(t, time.Since(start), 50*time.Millisecond)
|
||||
})
|
||||
|
||||
t.Run("正常等待后返回true", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
ok := sleepWithContext(context.Background(), 50*time.Millisecond)
|
||||
elapsed := time.Since(start)
|
||||
require.True(t, ok)
|
||||
require.GreaterOrEqual(t, elapsed, 40*time.Millisecond)
|
||||
require.Less(t, elapsed, 500*time.Millisecond)
|
||||
})
|
||||
|
||||
t.Run("已取消context立即返回false", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
start := time.Now()
|
||||
ok := sleepWithContext(ctx, 5*time.Second)
|
||||
require.False(t, ok)
|
||||
require.Less(t, time.Since(start), 50*time.Millisecond)
|
||||
})
|
||||
|
||||
t.Run("等待期间context取消返回false", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
start := time.Now()
|
||||
ok := sleepWithContext(ctx, 5*time.Second)
|
||||
elapsed := time.Since(start)
|
||||
require.False(t, ok)
|
||||
require.Less(t, elapsed, 500*time.Millisecond)
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — 基本切换流程
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_BasicSwitch(t *testing.T) {
|
||||
t.Run("非重试错误_非Antigravity_直接切换", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 1, fs.SwitchCount)
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
require.Equal(t, err, fs.LastFailoverErr)
|
||||
require.False(t, fs.ForceCacheBilling)
|
||||
require.Empty(t, mock.calls, "不应调用 TempUnschedule")
|
||||
})
|
||||
|
||||
t.Run("非重试错误_Antigravity_第一次切换无延迟", func(t *testing.T) {
|
||||
// switchCount 从 0→1 时,sleepFailoverDelay(ctx, 1) 的延时 = (1-1)*1s = 0
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, service.PlatformAntigravity, err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 1, fs.SwitchCount)
|
||||
require.Less(t, elapsed, 200*time.Millisecond, "第一次切换延迟应为 0")
|
||||
})
|
||||
|
||||
t.Run("非重试错误_Antigravity_第二次切换有1秒延迟", func(t *testing.T) {
|
||||
// switchCount 从 1→2 时,sleepFailoverDelay(ctx, 2) 的延时 = (2-1)*1s = 1s
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
fs.SwitchCount = 1 // 模拟已切换一次
|
||||
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 200, service.PlatformAntigravity, err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 2, fs.SwitchCount)
|
||||
require.GreaterOrEqual(t, elapsed, 800*time.Millisecond, "第二次切换延迟应约 1s")
|
||||
require.Less(t, elapsed, 3*time.Second)
|
||||
})
|
||||
|
||||
t.Run("连续切换直到耗尽", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(2, false)
|
||||
|
||||
// 第一次切换:0→1
|
||||
err1 := newTestFailoverErr(500, false, false)
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err1)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 1, fs.SwitchCount)
|
||||
|
||||
// 第二次切换:1→2
|
||||
err2 := newTestFailoverErr(502, false, false)
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 200, "openai", err2)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 2, fs.SwitchCount)
|
||||
|
||||
// 第三次已耗尽:SwitchCount(2) >= MaxSwitches(2)
|
||||
err3 := newTestFailoverErr(503, false, false)
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 300, "openai", err3)
|
||||
require.Equal(t, FailoverExhausted, action)
|
||||
require.Equal(t, 2, fs.SwitchCount, "耗尽时不应继续递增")
|
||||
|
||||
// 验证失败账号列表
|
||||
require.Len(t, fs.FailedAccountIDs, 3)
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(200))
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(300))
|
||||
|
||||
// LastFailoverErr 应为最后一次的错误
|
||||
require.Equal(t, err3, fs.LastFailoverErr)
|
||||
})
|
||||
|
||||
t.Run("MaxSwitches为0时首次即耗尽", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(0, false)
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverExhausted, action)
|
||||
require.Equal(t, 0, fs.SwitchCount)
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — 缓存计费 (ForceCacheBilling)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_CacheBilling(t *testing.T) {
|
||||
t.Run("hasBoundSession为true时设置ForceCacheBilling", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, true) // hasBoundSession=true
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.True(t, fs.ForceCacheBilling)
|
||||
})
|
||||
|
||||
t.Run("failoverErr.ForceCacheBilling为true时设置", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, false, true) // ForceCacheBilling=true
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.True(t, fs.ForceCacheBilling)
|
||||
})
|
||||
|
||||
t.Run("两者均为false时不设置", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.False(t, fs.ForceCacheBilling)
|
||||
})
|
||||
|
||||
t.Run("一旦设置不会被后续错误重置", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
|
||||
// 第一次:ForceCacheBilling=true → 设置
|
||||
err1 := newTestFailoverErr(500, false, true)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err1)
|
||||
require.True(t, fs.ForceCacheBilling)
|
||||
|
||||
// 第二次:ForceCacheBilling=false → 仍然保持 true
|
||||
err2 := newTestFailoverErr(502, false, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 200, "openai", err2)
|
||||
require.True(t, fs.ForceCacheBilling, "ForceCacheBilling 一旦设置不应被重置")
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — 同账号重试 (RetryableOnSameAccount)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_SameAccountRetry(t *testing.T) {
|
||||
t.Run("第一次重试返回FailoverRetry", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[100])
|
||||
require.Equal(t, 0, fs.SwitchCount, "同账号重试不应增加切换计数")
|
||||
require.NotContains(t, fs.FailedAccountIDs, int64(100), "同账号重试不应加入失败列表")
|
||||
require.Empty(t, mock.calls, "同账号重试期间不应调用 TempUnschedule")
|
||||
// 验证等待了 sameAccountRetryDelay (500ms)
|
||||
require.GreaterOrEqual(t, elapsed, 400*time.Millisecond)
|
||||
require.Less(t, elapsed, 2*time.Second)
|
||||
})
|
||||
|
||||
t.Run("第二次重试仍返回FailoverRetry", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
// 第一次
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[100])
|
||||
|
||||
// 第二次
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 2, fs.SameAccountRetryCount[100])
|
||||
|
||||
require.Empty(t, mock.calls, "两次重试期间均不应调用 TempUnschedule")
|
||||
})
|
||||
|
||||
t.Run("第三次重试耗尽_触发TempUnschedule并切换", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
// 第一次、第二次重试
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, 2, fs.SameAccountRetryCount[100])
|
||||
|
||||
// 第三次:重试已达到 maxSameAccountRetries(2),应切换账号
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 1, fs.SwitchCount)
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
|
||||
// 验证 TempUnschedule 被调用
|
||||
require.Len(t, mock.calls, 1)
|
||||
require.Equal(t, int64(100), mock.calls[0].accountID)
|
||||
require.Equal(t, err, mock.calls[0].failoverErr)
|
||||
})
|
||||
|
||||
t.Run("不同账号独立跟踪重试次数", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(5, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
// 账号 100 第一次重试
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[100])
|
||||
|
||||
// 账号 200 第一次重试(独立计数)
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 200, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[200])
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[100], "账号 100 的计数不应受影响")
|
||||
})
|
||||
|
||||
t.Run("重试耗尽后再次遇到同账号_直接切换", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(5, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
// 耗尽账号 100 的重试
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
// 第三次: 重试耗尽 → 切换
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
|
||||
// 再次遇到账号 100,计数仍为 2,条件不满足 → 直接切换
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Len(t, mock.calls, 2, "第二次耗尽也应调用 TempUnschedule")
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — TempUnschedule 调用验证
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_TempUnschedule(t *testing.T) {
|
||||
t.Run("非重试错误不调用TempUnschedule", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, false, false) // RetryableOnSameAccount=false
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Empty(t, mock.calls)
|
||||
})
|
||||
|
||||
t.Run("重试错误耗尽后调用TempUnschedule_传入正确参数", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(502, true, false)
|
||||
|
||||
// 耗尽重试
|
||||
fs.HandleFailoverError(context.Background(), mock, 42, "openai", err)
|
||||
fs.HandleFailoverError(context.Background(), mock, 42, "openai", err)
|
||||
fs.HandleFailoverError(context.Background(), mock, 42, "openai", err)
|
||||
|
||||
require.Len(t, mock.calls, 1)
|
||||
require.Equal(t, int64(42), mock.calls[0].accountID)
|
||||
require.Equal(t, 502, mock.calls[0].failoverErr.StatusCode)
|
||||
require.True(t, mock.calls[0].failoverErr.RetryableOnSameAccount)
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — Context 取消
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_ContextCanceled(t *testing.T) {
|
||||
t.Run("同账号重试sleep期间context取消", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // 立即取消
|
||||
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(ctx, mock, 100, "openai", err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverCanceled, action)
|
||||
require.Less(t, elapsed, 100*time.Millisecond, "应立即返回")
|
||||
// 重试计数仍应递增
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[100])
|
||||
})
|
||||
|
||||
t.Run("Antigravity延迟期间context取消", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
fs.SwitchCount = 1 // 下一次 switchCount=2 → delay = 1s
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // 立即取消
|
||||
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(ctx, mock, 100, service.PlatformAntigravity, err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverCanceled, action)
|
||||
require.Less(t, elapsed, 100*time.Millisecond, "应立即返回而非等待 1s")
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — FailedAccountIDs 跟踪
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_FailedAccountIDs(t *testing.T) {
|
||||
t.Run("切换时添加到失败列表", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", newTestFailoverErr(500, false, false))
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 200, "openai", newTestFailoverErr(502, false, false))
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(200))
|
||||
require.Len(t, fs.FailedAccountIDs, 2)
|
||||
})
|
||||
|
||||
t.Run("耗尽时也添加到失败列表", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(0, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", newTestFailoverErr(500, false, false))
|
||||
require.Equal(t, FailoverExhausted, action)
|
||||
require.Contains(t, fs.FailedAccountIDs, int64(100))
|
||||
})
|
||||
|
||||
t.Run("同账号重试期间不添加到失败列表", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", newTestFailoverErr(400, true, false))
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.NotContains(t, fs.FailedAccountIDs, int64(100))
|
||||
})
|
||||
|
||||
t.Run("同一账号多次切换不重复添加", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(5, false)
|
||||
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", newTestFailoverErr(500, false, false))
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", newTestFailoverErr(500, false, false))
|
||||
require.Len(t, fs.FailedAccountIDs, 1, "map 天然去重")
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — LastFailoverErr 更新
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_LastFailoverErr(t *testing.T) {
|
||||
t.Run("每次调用都更新LastFailoverErr", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
|
||||
err1 := newTestFailoverErr(500, false, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err1)
|
||||
require.Equal(t, err1, fs.LastFailoverErr)
|
||||
|
||||
err2 := newTestFailoverErr(502, false, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 200, "openai", err2)
|
||||
require.Equal(t, err2, fs.LastFailoverErr)
|
||||
})
|
||||
|
||||
t.Run("同账号重试时也更新LastFailoverErr", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
|
||||
err := newTestFailoverErr(400, true, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, err, fs.LastFailoverErr)
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — 综合集成场景
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_IntegrationScenario(t *testing.T) {
|
||||
t.Run("模拟完整failover流程_多账号混合重试与切换", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, true) // hasBoundSession=true
|
||||
|
||||
// 1. 账号 100 遇到可重试错误,同账号重试 2 次
|
||||
retryErr := newTestFailoverErr(400, true, false)
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", retryErr)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.True(t, fs.ForceCacheBilling, "hasBoundSession=true 应设置 ForceCacheBilling")
|
||||
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 100, "openai", retryErr)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
|
||||
// 2. 账号 100 重试耗尽 → TempUnschedule + 切换
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 100, "openai", retryErr)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 1, fs.SwitchCount)
|
||||
require.Len(t, mock.calls, 1)
|
||||
|
||||
// 3. 账号 200 遇到不可重试错误 → 直接切换
|
||||
switchErr := newTestFailoverErr(500, false, false)
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 200, "openai", switchErr)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 2, fs.SwitchCount)
|
||||
|
||||
// 4. 账号 300 遇到不可重试错误 → 再切换
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 300, "openai", switchErr)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Equal(t, 3, fs.SwitchCount)
|
||||
|
||||
// 5. 账号 400 → 已耗尽 (SwitchCount=3 >= MaxSwitches=3)
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 400, "openai", switchErr)
|
||||
require.Equal(t, FailoverExhausted, action)
|
||||
|
||||
// 最终状态验证
|
||||
require.Equal(t, 3, fs.SwitchCount, "耗尽时不再递增")
|
||||
require.Len(t, fs.FailedAccountIDs, 4, "4个不同账号都在失败列表中")
|
||||
require.True(t, fs.ForceCacheBilling)
|
||||
require.Len(t, mock.calls, 1, "只有账号 100 触发了 TempUnschedule")
|
||||
})
|
||||
|
||||
t.Run("模拟Antigravity平台完整流程", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(2, false)
|
||||
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
// 第一次切换:delay = 0s
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, service.PlatformAntigravity, err)
|
||||
elapsed := time.Since(start)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Less(t, elapsed, 200*time.Millisecond, "第一次切换延迟为 0")
|
||||
|
||||
// 第二次切换:delay = 1s
|
||||
start = time.Now()
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 200, service.PlatformAntigravity, err)
|
||||
elapsed = time.Since(start)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.GreaterOrEqual(t, elapsed, 800*time.Millisecond, "第二次切换延迟约 1s")
|
||||
|
||||
// 第三次:耗尽(无延迟,因为在检查延迟之前就返回了)
|
||||
start = time.Now()
|
||||
action = fs.HandleFailoverError(context.Background(), mock, 300, service.PlatformAntigravity, err)
|
||||
elapsed = time.Since(start)
|
||||
require.Equal(t, FailoverExhausted, action)
|
||||
require.Less(t, elapsed, 200*time.Millisecond, "耗尽时不应有延迟")
|
||||
})
|
||||
|
||||
t.Run("ForceCacheBilling通过错误标志设置", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false) // hasBoundSession=false
|
||||
|
||||
// 第一次:ForceCacheBilling=false
|
||||
err1 := newTestFailoverErr(500, false, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 100, "openai", err1)
|
||||
require.False(t, fs.ForceCacheBilling)
|
||||
|
||||
// 第二次:ForceCacheBilling=true(Antigravity 粘性会话切换)
|
||||
err2 := newTestFailoverErr(500, false, true)
|
||||
fs.HandleFailoverError(context.Background(), mock, 200, "openai", err2)
|
||||
require.True(t, fs.ForceCacheBilling, "错误标志应触发 ForceCacheBilling")
|
||||
|
||||
// 第三次:ForceCacheBilling=false,但状态仍保持 true
|
||||
err3 := newTestFailoverErr(500, false, false)
|
||||
fs.HandleFailoverError(context.Background(), mock, 300, "openai", err3)
|
||||
require.True(t, fs.ForceCacheBilling, "不应重置")
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// HandleFailoverError — 边界条件
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestHandleFailoverError_EdgeCases(t *testing.T) {
|
||||
t.Run("StatusCode为0的错误也能正常处理", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(0, false, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "openai", err)
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
})
|
||||
|
||||
t.Run("AccountID为0也能正常跟踪", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, true, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 0, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[0])
|
||||
})
|
||||
|
||||
t.Run("负AccountID也能正常跟踪", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
err := newTestFailoverErr(500, true, false)
|
||||
|
||||
action := fs.HandleFailoverError(context.Background(), mock, -1, "openai", err)
|
||||
require.Equal(t, FailoverRetry, action)
|
||||
require.Equal(t, 1, fs.SameAccountRetryCount[-1])
|
||||
})
|
||||
|
||||
t.Run("空平台名称不触发Antigravity延迟", func(t *testing.T) {
|
||||
mock := &mockTempUnscheduler{}
|
||||
fs := NewFailoverState(3, false)
|
||||
fs.SwitchCount = 1
|
||||
err := newTestFailoverErr(500, false, false)
|
||||
|
||||
start := time.Now()
|
||||
action := fs.HandleFailoverError(context.Background(), mock, 100, "", err)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
require.Equal(t, FailoverSwitch, action)
|
||||
require.Less(t, elapsed, 200*time.Millisecond, "空平台不应触发 Antigravity 延迟")
|
||||
})
|
||||
}
|
||||
@@ -232,12 +232,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
|
||||
|
||||
if platform == service.PlatformGemini {
|
||||
maxAccountSwitches := h.maxAccountSwitchesGemini
|
||||
switchCount := 0
|
||||
failedAccountIDs := make(map[int64]struct{})
|
||||
sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
|
||||
var lastFailoverErr *service.UpstreamFailoverError
|
||||
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
|
||||
fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession)
|
||||
|
||||
// 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。
|
||||
// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。
|
||||
@@ -247,27 +242,27 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
}
|
||||
|
||||
for {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "") // Gemini 不使用会话限制
|
||||
if err != nil {
|
||||
if len(failedAccountIDs) == 0 {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
|
||||
return
|
||||
}
|
||||
// Antigravity 单账号退避重试:分组内没有其他可用账号时,
|
||||
// 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
|
||||
// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
|
||||
if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
|
||||
failedAccountIDs = make(map[int64]struct{})
|
||||
if fs.LastFailoverErr != nil && fs.LastFailoverErr.StatusCode == http.StatusServiceUnavailable && fs.SwitchCount <= fs.MaxSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), fs.SwitchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", fs.SwitchCount, fs.MaxSwitches)
|
||||
fs.FailedAccountIDs = make(map[int64]struct{})
|
||||
// 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
|
||||
ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
|
||||
c.Request = c.Request.WithContext(ctx)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if lastFailoverErr != nil {
|
||||
h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
|
||||
if fs.LastFailoverErr != nil {
|
||||
h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
|
||||
} else {
|
||||
h.handleFailoverExhaustedSimple(c, 502, streamStarted)
|
||||
}
|
||||
@@ -346,8 +341,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
// 转发请求 - 根据账号平台分流
|
||||
var result *service.ForwardResult
|
||||
requestCtx := c.Request.Context()
|
||||
if switchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
|
||||
if fs.SwitchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, fs.SwitchCount)
|
||||
}
|
||||
if account.Platform == service.PlatformAntigravity {
|
||||
result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body, hasBoundSession)
|
||||
@@ -360,40 +355,16 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
if err != nil {
|
||||
var failoverErr *service.UpstreamFailoverError
|
||||
if errors.As(err, &failoverErr) {
|
||||
lastFailoverErr = failoverErr
|
||||
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||
forceCacheBilling = true
|
||||
}
|
||||
|
||||
// 同账号重试:对 RetryableOnSameAccount 的临时性错误,先在同一账号上重试
|
||||
if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries {
|
||||
sameAccountRetryCount[account.ID]++
|
||||
log.Printf("Account %d: retryable error %d, same-account retry %d/%d",
|
||||
account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries)
|
||||
if !sleepSameAccountRetryDelay(c.Request.Context()) {
|
||||
return
|
||||
}
|
||||
action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
|
||||
switch action {
|
||||
case FailoverRetry, FailoverSwitch:
|
||||
continue
|
||||
}
|
||||
|
||||
// 同账号重试用尽,执行临时封禁并切换账号
|
||||
if failoverErr.RetryableOnSameAccount {
|
||||
h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr)
|
||||
}
|
||||
|
||||
failedAccountIDs[account.ID] = struct{}{}
|
||||
if switchCount >= maxAccountSwitches {
|
||||
h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
|
||||
case FailoverExhausted:
|
||||
h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
|
||||
return
|
||||
case FailoverCanceled:
|
||||
return
|
||||
}
|
||||
switchCount++
|
||||
log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
|
||||
if account.Platform == service.PlatformAntigravity {
|
||||
if !sleepFailoverDelay(c.Request.Context(), switchCount) {
|
||||
return
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
// 错误响应已在Forward中处理,这里只记录日志
|
||||
log.Printf("Forward request failed: %v", err)
|
||||
@@ -421,7 +392,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
}); err != nil {
|
||||
log.Printf("Record usage failed: %v", err)
|
||||
}
|
||||
}(result, account, userAgent, clientIP, forceCacheBilling)
|
||||
}(result, account, userAgent, clientIP, fs.ForceCacheBilling)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -442,37 +413,32 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
}
|
||||
|
||||
for {
|
||||
maxAccountSwitches := h.maxAccountSwitches
|
||||
switchCount := 0
|
||||
failedAccountIDs := make(map[int64]struct{})
|
||||
sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
|
||||
var lastFailoverErr *service.UpstreamFailoverError
|
||||
fs := NewFailoverState(h.maxAccountSwitches, hasBoundSession)
|
||||
retryWithFallback := false
|
||||
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
|
||||
|
||||
for {
|
||||
// 选择支持该模型的账号
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID)
|
||||
if err != nil {
|
||||
if len(failedAccountIDs) == 0 {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
|
||||
return
|
||||
}
|
||||
// Antigravity 单账号退避重试:分组内没有其他可用账号时,
|
||||
// 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
|
||||
// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
|
||||
if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
|
||||
failedAccountIDs = make(map[int64]struct{})
|
||||
if fs.LastFailoverErr != nil && fs.LastFailoverErr.StatusCode == http.StatusServiceUnavailable && fs.SwitchCount <= fs.MaxSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), fs.SwitchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", fs.SwitchCount, fs.MaxSwitches)
|
||||
fs.FailedAccountIDs = make(map[int64]struct{})
|
||||
// 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
|
||||
ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
|
||||
c.Request = c.Request.WithContext(ctx)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if lastFailoverErr != nil {
|
||||
h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
|
||||
if fs.LastFailoverErr != nil {
|
||||
h.handleFailoverExhausted(c, fs.LastFailoverErr, platform, streamStarted)
|
||||
} else {
|
||||
h.handleFailoverExhaustedSimple(c, 502, streamStarted)
|
||||
}
|
||||
@@ -549,8 +515,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
// 转发请求 - 根据账号平台分流
|
||||
var result *service.ForwardResult
|
||||
requestCtx := c.Request.Context()
|
||||
if switchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
|
||||
if fs.SwitchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, fs.SwitchCount)
|
||||
}
|
||||
if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
|
||||
result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body, hasBoundSession)
|
||||
@@ -598,40 +564,16 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
}
|
||||
var failoverErr *service.UpstreamFailoverError
|
||||
if errors.As(err, &failoverErr) {
|
||||
lastFailoverErr = failoverErr
|
||||
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||
forceCacheBilling = true
|
||||
}
|
||||
|
||||
// 同账号重试:对 RetryableOnSameAccount 的临时性错误,先在同一账号上重试
|
||||
if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries {
|
||||
sameAccountRetryCount[account.ID]++
|
||||
log.Printf("Account %d: retryable error %d, same-account retry %d/%d",
|
||||
account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries)
|
||||
if !sleepSameAccountRetryDelay(c.Request.Context()) {
|
||||
return
|
||||
}
|
||||
action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
|
||||
switch action {
|
||||
case FailoverRetry, FailoverSwitch:
|
||||
continue
|
||||
}
|
||||
|
||||
// 同账号重试用尽,执行临时封禁并切换账号
|
||||
if failoverErr.RetryableOnSameAccount {
|
||||
h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr)
|
||||
}
|
||||
|
||||
failedAccountIDs[account.ID] = struct{}{}
|
||||
if switchCount >= maxAccountSwitches {
|
||||
h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
|
||||
case FailoverExhausted:
|
||||
h.handleFailoverExhausted(c, fs.LastFailoverErr, account.Platform, streamStarted)
|
||||
return
|
||||
case FailoverCanceled:
|
||||
return
|
||||
}
|
||||
switchCount++
|
||||
log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
|
||||
if account.Platform == service.PlatformAntigravity {
|
||||
if !sleepFailoverDelay(c.Request.Context(), switchCount) {
|
||||
return
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
// 错误响应已在Forward中处理,这里只记录日志
|
||||
log.Printf("Account %d: Forward request failed: %v", account.ID, err)
|
||||
@@ -659,7 +601,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
||||
}); err != nil {
|
||||
log.Printf("Record usage failed: %v", err)
|
||||
}
|
||||
}(result, account, userAgent, clientIP, forceCacheBilling)
|
||||
}(result, account, userAgent, clientIP, fs.ForceCacheBilling)
|
||||
return
|
||||
}
|
||||
if !retryWithFallback {
|
||||
@@ -899,23 +841,6 @@ func needForceCacheBilling(hasBoundSession bool, failoverErr *service.UpstreamFa
|
||||
return hasBoundSession || (failoverErr != nil && failoverErr.ForceCacheBilling)
|
||||
}
|
||||
|
||||
const (
|
||||
// maxSameAccountRetries 同账号重试次数上限(针对 RetryableOnSameAccount 错误)
|
||||
maxSameAccountRetries = 2
|
||||
// sameAccountRetryDelay 同账号重试间隔
|
||||
sameAccountRetryDelay = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
// sleepSameAccountRetryDelay 同账号重试固定延时,返回 false 表示 context 已取消。
|
||||
func sleepSameAccountRetryDelay(ctx context.Context) bool {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case <-time.After(sameAccountRetryDelay):
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// sleepFailoverDelay 账号切换线性递增延时:第1次0s、第2次1s、第3次2s…
|
||||
// 返回 false 表示 context 已取消。
|
||||
func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
|
||||
@@ -971,6 +896,10 @@ func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *se
|
||||
msg = *rule.CustomMessage
|
||||
}
|
||||
|
||||
if rule.SkipMonitoring {
|
||||
c.Set(service.OpsSkipPassthroughKey, true)
|
||||
}
|
||||
|
||||
h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -321,11 +321,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
|
||||
cleanedForUnknownBinding := false
|
||||
|
||||
maxAccountSwitches := h.maxAccountSwitchesGemini
|
||||
switchCount := 0
|
||||
failedAccountIDs := make(map[int64]struct{})
|
||||
var lastFailoverErr *service.UpstreamFailoverError
|
||||
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
|
||||
fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession)
|
||||
|
||||
// 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。
|
||||
// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。
|
||||
@@ -335,26 +331,26 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
}
|
||||
|
||||
for {
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
|
||||
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, fs.FailedAccountIDs, "") // Gemini 不使用会话限制
|
||||
if err != nil {
|
||||
if len(failedAccountIDs) == 0 {
|
||||
if len(fs.FailedAccountIDs) == 0 {
|
||||
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
|
||||
return
|
||||
}
|
||||
// Antigravity 单账号退避重试:分组内没有其他可用账号时,
|
||||
// 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
|
||||
// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
|
||||
if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
|
||||
failedAccountIDs = make(map[int64]struct{})
|
||||
if fs.LastFailoverErr != nil && fs.LastFailoverErr.StatusCode == http.StatusServiceUnavailable && fs.SwitchCount <= fs.MaxSwitches {
|
||||
if sleepAntigravitySingleAccountBackoff(c.Request.Context(), fs.SwitchCount) {
|
||||
log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", fs.SwitchCount, fs.MaxSwitches)
|
||||
fs.FailedAccountIDs = make(map[int64]struct{})
|
||||
// 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
|
||||
ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
|
||||
c.Request = c.Request.WithContext(ctx)
|
||||
continue
|
||||
}
|
||||
}
|
||||
h.handleGeminiFailoverExhausted(c, lastFailoverErr)
|
||||
h.handleGeminiFailoverExhausted(c, fs.LastFailoverErr)
|
||||
return
|
||||
}
|
||||
account := selection.Account
|
||||
@@ -429,8 +425,8 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
// 5) forward (根据平台分流)
|
||||
var result *service.ForwardResult
|
||||
requestCtx := c.Request.Context()
|
||||
if switchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
|
||||
if fs.SwitchCount > 0 {
|
||||
requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, fs.SwitchCount)
|
||||
}
|
||||
if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
|
||||
result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, modelName, action, stream, body, hasBoundSession)
|
||||
@@ -443,23 +439,16 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
if err != nil {
|
||||
var failoverErr *service.UpstreamFailoverError
|
||||
if errors.As(err, &failoverErr) {
|
||||
failedAccountIDs[account.ID] = struct{}{}
|
||||
lastFailoverErr = failoverErr
|
||||
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||
forceCacheBilling = true
|
||||
}
|
||||
if switchCount >= maxAccountSwitches {
|
||||
h.handleGeminiFailoverExhausted(c, failoverErr)
|
||||
action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
|
||||
switch action {
|
||||
case FailoverRetry, FailoverSwitch:
|
||||
continue
|
||||
case FailoverExhausted:
|
||||
h.handleGeminiFailoverExhausted(c, fs.LastFailoverErr)
|
||||
return
|
||||
case FailoverCanceled:
|
||||
return
|
||||
}
|
||||
switchCount++
|
||||
log.Printf("Gemini account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
|
||||
if account.Platform == service.PlatformAntigravity {
|
||||
if !sleepFailoverDelay(c.Request.Context(), switchCount) {
|
||||
return
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
// ForwardNative already wrote the response
|
||||
log.Printf("Gemini native forward failed: %v", err)
|
||||
@@ -505,7 +494,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
||||
}); err != nil {
|
||||
log.Printf("Record usage failed: %v", err)
|
||||
}
|
||||
}(result, account, userAgent, clientIP, forceCacheBilling)
|
||||
}(result, account, userAgent, clientIP, fs.ForceCacheBilling)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -553,6 +542,10 @@ func (h *GatewayHandler) handleGeminiFailoverExhausted(c *gin.Context, failoverE
|
||||
msg = *rule.CustomMessage
|
||||
}
|
||||
|
||||
if rule.SkipMonitoring {
|
||||
c.Set(service.OpsSkipPassthroughKey, true)
|
||||
}
|
||||
|
||||
googleError(c, respCode, msg)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -354,6 +354,10 @@ func (h *OpenAIGatewayHandler) handleFailoverExhausted(c *gin.Context, failoverE
|
||||
msg = *rule.CustomMessage
|
||||
}
|
||||
|
||||
if rule.SkipMonitoring {
|
||||
c.Set(service.OpsSkipPassthroughKey, true)
|
||||
}
|
||||
|
||||
h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -537,6 +537,13 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
|
||||
// Store request headers/body only when an upstream error occurred to keep overhead minimal.
|
||||
entry.RequestHeadersJSON = extractOpsRetryRequestHeaders(c)
|
||||
|
||||
// Skip logging if a passthrough rule with skip_monitoring=true matched.
|
||||
if v, ok := c.Get(service.OpsSkipPassthroughKey); ok {
|
||||
if skip, _ := v.(bool); skip {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
enqueueOpsErrorLog(ops, entry, requestBody)
|
||||
return
|
||||
}
|
||||
@@ -544,6 +551,13 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
|
||||
body := w.buf.Bytes()
|
||||
parsed := parseOpsErrorResponse(body)
|
||||
|
||||
// Skip logging if a passthrough rule with skip_monitoring=true matched.
|
||||
if v, ok := c.Get(service.OpsSkipPassthroughKey); ok {
|
||||
if skip, _ := v.(bool); skip {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Skip logging if the error should be filtered based on settings
|
||||
if shouldSkipOpsErrorLog(c.Request.Context(), ops, parsed.Message, string(body), c.Request.URL.Path) {
|
||||
return
|
||||
|
||||
@@ -18,6 +18,7 @@ type ErrorPassthroughRule struct {
|
||||
ResponseCode *int `json:"response_code"` // 自定义状态码(passthrough_code=false 时使用)
|
||||
PassthroughBody bool `json:"passthrough_body"` // 是否透传原始错误信息
|
||||
CustomMessage *string `json:"custom_message"` // 自定义错误信息(passthrough_body=false 时使用)
|
||||
SkipMonitoring bool `json:"skip_monitoring"` // 是否跳过运维监控记录
|
||||
Description *string `json:"description"` // 规则描述
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
|
||||
@@ -54,7 +54,8 @@ func (r *errorPassthroughRepository) Create(ctx context.Context, rule *model.Err
|
||||
SetPriority(rule.Priority).
|
||||
SetMatchMode(rule.MatchMode).
|
||||
SetPassthroughCode(rule.PassthroughCode).
|
||||
SetPassthroughBody(rule.PassthroughBody)
|
||||
SetPassthroughBody(rule.PassthroughBody).
|
||||
SetSkipMonitoring(rule.SkipMonitoring)
|
||||
|
||||
if len(rule.ErrorCodes) > 0 {
|
||||
builder.SetErrorCodes(rule.ErrorCodes)
|
||||
@@ -90,7 +91,8 @@ func (r *errorPassthroughRepository) Update(ctx context.Context, rule *model.Err
|
||||
SetPriority(rule.Priority).
|
||||
SetMatchMode(rule.MatchMode).
|
||||
SetPassthroughCode(rule.PassthroughCode).
|
||||
SetPassthroughBody(rule.PassthroughBody)
|
||||
SetPassthroughBody(rule.PassthroughBody).
|
||||
SetSkipMonitoring(rule.SkipMonitoring)
|
||||
|
||||
// 处理可选字段
|
||||
if len(rule.ErrorCodes) > 0 {
|
||||
@@ -149,6 +151,7 @@ func (r *errorPassthroughRepository) toModel(e *ent.ErrorPassthroughRule) *model
|
||||
Platforms: e.Platforms,
|
||||
PassthroughCode: e.PassthroughCode,
|
||||
PassthroughBody: e.PassthroughBody,
|
||||
SkipMonitoring: e.SkipMonitoring,
|
||||
CreatedAt: e.CreatedAt,
|
||||
UpdatedAt: e.UpdatedAt,
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
@@ -40,13 +41,11 @@ const (
|
||||
antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待)
|
||||
antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用)
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 专用常量
|
||||
// 容量不足是临时状态,所有账号共享容量池,与限流不同
|
||||
// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
|
||||
// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
|
||||
// - 重试仍为容量不足: 切换账号
|
||||
// - 重试遇到其他错误: 按实际错误码处理
|
||||
antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
|
||||
// MODEL_CAPACITY_EXHAUSTED 专用重试参数
|
||||
// 模型容量不足时,所有账号共享同一容量池,切换账号无意义
|
||||
// 使用固定 1s 间隔重试,最多重试 60 次
|
||||
antigravityModelCapacityRetryMaxAttempts = 60
|
||||
antigravityModelCapacityRetryWait = 1 * time.Second
|
||||
|
||||
// Google RPC 状态和类型常量
|
||||
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
|
||||
@@ -68,6 +67,9 @@ const (
|
||||
// 单账号 503 退避重试:原地重试的总累计等待时间上限
|
||||
// 超过此上限将不再重试,直接返回 503
|
||||
antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 全局去重:重试全部失败后的 cooldown 时间
|
||||
antigravityModelCapacityCooldown = 10 * time.Second
|
||||
)
|
||||
|
||||
// antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单(小写)
|
||||
@@ -76,8 +78,15 @@ var antigravityPassthroughErrorMessages = []string{
|
||||
"prompt is too long",
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 全局去重:避免多个并发请求同时对同一模型进行容量耗尽重试
|
||||
var (
|
||||
modelCapacityExhaustedMu sync.RWMutex
|
||||
modelCapacityExhaustedUntil = make(map[string]time.Time) // modelName -> cooldown until
|
||||
)
|
||||
|
||||
const (
|
||||
antigravityBillingModelEnv = "GATEWAY_ANTIGRAVITY_BILL_WITH_MAPPED_MODEL"
|
||||
antigravityForwardBaseURLEnv = "GATEWAY_ANTIGRAVITY_FORWARD_BASE_URL"
|
||||
antigravityFallbackSecondsEnv = "GATEWAY_ANTIGRAVITY_FALLBACK_COOLDOWN_SECONDS"
|
||||
)
|
||||
|
||||
@@ -139,6 +148,20 @@ type antigravityRetryLoopResult struct {
|
||||
resp *http.Response
|
||||
}
|
||||
|
||||
// resolveAntigravityForwardBaseURL 解析转发用 base URL。
|
||||
// 默认使用 daily(ForwardBaseURLs 的首个地址);当环境变量为 prod 时使用第二个地址。
|
||||
func resolveAntigravityForwardBaseURL() string {
|
||||
baseURLs := antigravity.ForwardBaseURLs()
|
||||
if len(baseURLs) == 0 {
|
||||
return ""
|
||||
}
|
||||
mode := strings.ToLower(strings.TrimSpace(os.Getenv(antigravityForwardBaseURLEnv)))
|
||||
if mode == "prod" && len(baseURLs) > 1 {
|
||||
return baseURLs[1]
|
||||
}
|
||||
return baseURLs[0]
|
||||
}
|
||||
|
||||
// smartRetryAction 智能重试的处理结果
|
||||
type smartRetryAction int
|
||||
|
||||
@@ -168,11 +191,6 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
// 判断是否触发智能重试
|
||||
shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody)
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED: 独立处理
|
||||
if isModelCapacityExhausted {
|
||||
return s.handleModelCapacityExhaustedRetry(p, resp, respBody, baseURL, waitDuration, modelName)
|
||||
}
|
||||
|
||||
// 情况1: retryDelay >= 阈值,限流模型并切换账号
|
||||
if shouldRateLimitModel {
|
||||
// 单账号 503 退避重试模式:不设限流、不切换账号,改为原地等待+重试
|
||||
@@ -208,20 +226,48 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
}
|
||||
}
|
||||
|
||||
// 情况2: retryDelay < 阈值,智能重试(最多 antigravitySmartRetryMaxAttempts 次)
|
||||
// 情况2: retryDelay < 阈值(或 MODEL_CAPACITY_EXHAUSTED),智能重试
|
||||
if shouldSmartRetry {
|
||||
var lastRetryResp *http.Response
|
||||
var lastRetryBody []byte
|
||||
|
||||
for attempt := 1; attempt <= antigravitySmartRetryMaxAttempts; attempt++ {
|
||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, antigravitySmartRetryMaxAttempts, waitDuration, modelName, p.account.ID)
|
||||
// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数(60 次,固定 1s 间隔)
|
||||
maxAttempts := antigravitySmartRetryMaxAttempts
|
||||
if isModelCapacityExhausted {
|
||||
maxAttempts = antigravityModelCapacityRetryMaxAttempts
|
||||
waitDuration = antigravityModelCapacityRetryWait
|
||||
|
||||
// 全局去重:如果其他 goroutine 已在重试同一模型且尚在 cooldown 中,直接返回 503
|
||||
if modelName != "" {
|
||||
modelCapacityExhaustedMu.RLock()
|
||||
cooldownUntil, exists := modelCapacityExhaustedUntil[modelName]
|
||||
modelCapacityExhaustedMu.RUnlock()
|
||||
if exists && time.Now().Before(cooldownUntil) {
|
||||
log.Printf("%s status=%d model_capacity_exhausted_dedup model=%s account=%d cooldown_until=%v (skip retry)",
|
||||
p.prefix, resp.StatusCode, modelName, p.account.ID, cooldownUntil.Format("15:04:05"))
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
resp: &http.Response{
|
||||
StatusCode: resp.StatusCode,
|
||||
Header: resp.Header.Clone(),
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
timer.Stop()
|
||||
log.Printf("%s status=context_canceled_during_smart_retry", p.prefix)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
|
||||
case <-time.After(waitDuration):
|
||||
case <-timer.C:
|
||||
}
|
||||
|
||||
// 智能重试:创建新请求
|
||||
@@ -241,13 +287,19 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
|
||||
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
|
||||
if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
|
||||
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, antigravitySmartRetryMaxAttempts)
|
||||
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, maxAttempts)
|
||||
// 重试成功,清除 MODEL_CAPACITY_EXHAUSTED cooldown
|
||||
if isModelCapacityExhausted && modelName != "" {
|
||||
modelCapacityExhaustedMu.Lock()
|
||||
delete(modelCapacityExhaustedUntil, modelName)
|
||||
modelCapacityExhaustedMu.Unlock()
|
||||
}
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
||||
}
|
||||
|
||||
// 网络错误时,继续重试
|
||||
if retryErr != nil || retryResp == nil {
|
||||
log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, antigravitySmartRetryMaxAttempts, retryErr)
|
||||
log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, maxAttempts, retryErr)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -257,12 +309,12 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
}
|
||||
lastRetryResp = retryResp
|
||||
if retryResp != nil {
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
}
|
||||
|
||||
// 解析新的重试信息,用于下次重试的等待时间
|
||||
if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil {
|
||||
// 解析新的重试信息,用于下次重试的等待时间(MODEL_CAPACITY_EXHAUSTED 使用固定循环,跳过)
|
||||
if !isModelCapacityExhausted && attempt < maxAttempts && lastRetryBody != nil {
|
||||
newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
|
||||
if newShouldRetry && newWaitDuration > 0 {
|
||||
waitDuration = newWaitDuration
|
||||
@@ -280,6 +332,27 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
retryBody = respBody
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED:模型容量不足,切换账号无意义
|
||||
// 直接返回上游错误响应,不设置模型限流,不切换账号
|
||||
if isModelCapacityExhausted {
|
||||
// 设置 cooldown,让后续请求快速失败,避免重复重试
|
||||
if modelName != "" {
|
||||
modelCapacityExhaustedMu.Lock()
|
||||
modelCapacityExhaustedUntil[modelName] = time.Now().Add(antigravityModelCapacityCooldown)
|
||||
modelCapacityExhaustedMu.Unlock()
|
||||
}
|
||||
log.Printf("%s status=%d smart_retry_exhausted_model_capacity attempts=%d model=%s account=%d body=%s (model capacity exhausted, not switching account)",
|
||||
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
resp: &http.Response{
|
||||
StatusCode: resp.StatusCode,
|
||||
Header: resp.Header.Clone(),
|
||||
Body: io.NopCloser(bytes.NewReader(retryBody)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// 单账号 503 退避重试模式:智能重试耗尽后不设限流、不切换账号,
|
||||
// 直接返回 503 让 Handler 层的单账号退避循环做最终处理。
|
||||
if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
|
||||
@@ -296,7 +369,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
}
|
||||
|
||||
log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
|
||||
p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
|
||||
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
|
||||
|
||||
resetAt := time.Now().Add(rateLimitDuration)
|
||||
if p.accountRepo != nil && modelName != "" {
|
||||
@@ -329,97 +402,6 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
return &smartRetryResult{action: smartRetryActionContinue}
|
||||
}
|
||||
|
||||
// handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
|
||||
// 策略:
|
||||
// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
|
||||
// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
|
||||
// - 重试成功: 直接返回
|
||||
// - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号
|
||||
// - 重试遇到其他错误 (429 限流等): 返回该响应,让上层按实际错误码处理
|
||||
func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
|
||||
p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
|
||||
baseURL string, retryDelay time.Duration, modelName string,
|
||||
) *smartRetryResult {
|
||||
// 确定等待时间
|
||||
waitDuration := retryDelay
|
||||
if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
|
||||
// 无 retryDelay 或 >= 20s: 固定等待 20s
|
||||
waitDuration = antigravityModelCapacityWaitThreshold
|
||||
}
|
||||
|
||||
log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID)
|
||||
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
|
||||
case <-time.After(waitDuration):
|
||||
}
|
||||
|
||||
retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
|
||||
if err != nil {
|
||||
log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
resp: &http.Response{
|
||||
StatusCode: resp.StatusCode,
|
||||
Header: resp.Header.Clone(),
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
|
||||
|
||||
// 网络错误: 切换账号
|
||||
if retryErr != nil || retryResp == nil {
|
||||
log.Printf("%s status=capacity_retry_network_error error=%v (switch account)",
|
||||
p.prefix, retryErr)
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
switchError: &AntigravityAccountSwitchError{
|
||||
OriginalAccountID: p.account.ID,
|
||||
RateLimitedModel: modelName,
|
||||
IsStickySession: p.isStickySession,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// 成功 (非 429/503): 直接返回
|
||||
if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
|
||||
log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
||||
}
|
||||
|
||||
// 读取重试响应体,判断是否仍为容量不足
|
||||
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
_ = retryResp.Body.Close()
|
||||
|
||||
retryInfo := parseAntigravitySmartRetryInfo(retryBody)
|
||||
|
||||
// 不再是 MODEL_CAPACITY_EXHAUSTED(例如变成了 429 限流): 返回该响应让上层处理
|
||||
if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
|
||||
log.Printf("%s status=%d capacity_retry_got_different_error body=%s",
|
||||
p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200))
|
||||
retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
||||
}
|
||||
|
||||
// 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号
|
||||
log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)",
|
||||
p.prefix, resp.StatusCode, modelName, p.account.ID)
|
||||
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
switchError: &AntigravityAccountSwitchError{
|
||||
OriginalAccountID: p.account.ID,
|
||||
RateLimitedModel: modelName,
|
||||
IsStickySession: p.isStickySession,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。
|
||||
//
|
||||
// 在多账号场景下,收到 503 + 长 retryDelay(≥ 7s)时会设置模型限流 + 切换账号;
|
||||
@@ -471,11 +453,13 @@ func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
|
||||
log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
timer.Stop()
|
||||
log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
|
||||
case <-time.After(waitDuration):
|
||||
case <-timer.C:
|
||||
}
|
||||
totalWaited += waitDuration
|
||||
|
||||
@@ -509,7 +493,7 @@ func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
|
||||
_ = lastRetryResp.Body.Close()
|
||||
}
|
||||
lastRetryResp = retryResp
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
|
||||
// 解析新的重试信息,更新下次等待时间
|
||||
@@ -570,10 +554,11 @@ func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopP
|
||||
}
|
||||
}
|
||||
|
||||
availableURLs := antigravity.DefaultURLAvailability.GetAvailableURLs()
|
||||
if len(availableURLs) == 0 {
|
||||
availableURLs = antigravity.BaseURLs
|
||||
baseURL := resolveAntigravityForwardBaseURL()
|
||||
if baseURL == "" {
|
||||
return nil, errors.New("no antigravity forward base url configured")
|
||||
}
|
||||
availableURLs := []string{baseURL}
|
||||
|
||||
var resp *http.Response
|
||||
var usedBaseURL string
|
||||
@@ -1011,11 +996,11 @@ func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account
|
||||
proxyURL = account.Proxy.URL()
|
||||
}
|
||||
|
||||
// URL fallback 循环
|
||||
availableURLs := antigravity.DefaultURLAvailability.GetAvailableURLs()
|
||||
if len(availableURLs) == 0 {
|
||||
availableURLs = antigravity.BaseURLs // 所有 URL 都不可用时,重试所有
|
||||
baseURL := resolveAntigravityForwardBaseURL()
|
||||
if baseURL == "" {
|
||||
return nil, errors.New("no antigravity forward base url configured")
|
||||
}
|
||||
availableURLs := []string{baseURL}
|
||||
|
||||
var lastErr error
|
||||
for urlIdx, baseURL := range availableURLs {
|
||||
@@ -1480,7 +1465,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
|
||||
break
|
||||
}
|
||||
|
||||
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
if retryResp.StatusCode == http.StatusTooManyRequests {
|
||||
retryBaseURL := ""
|
||||
@@ -2287,10 +2272,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
|
||||
sleepFor = 0
|
||||
}
|
||||
|
||||
timer := time.NewTimer(sleepFor)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return false
|
||||
case <-time.After(sleepFor):
|
||||
case <-timer.C:
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -2337,7 +2324,7 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) {
|
||||
type antigravitySmartRetryInfo struct {
|
||||
RetryDelay time.Duration // 重试延迟时间
|
||||
ModelName string // 限流的模型名称(如 "claude-sonnet-4-5")
|
||||
IsModelCapacityExhausted bool // 是否为 MODEL_CAPACITY_EXHAUSTED(503 容量不足,与 429 限流处理策略不同)
|
||||
IsModelCapacityExhausted bool // 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED)
|
||||
}
|
||||
|
||||
// parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息
|
||||
@@ -2446,9 +2433,8 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED: retryDelay 可以为 0(由调用方决定默认等待策略)
|
||||
// RATE_LIMIT_EXCEEDED: 无 retryDelay 时使用默认限流时间
|
||||
if retryDelay <= 0 && !hasModelCapacityExhausted {
|
||||
// 如果上游未提供 retryDelay,使用默认限流时间
|
||||
if retryDelay <= 0 {
|
||||
retryDelay = antigravityDefaultRateLimitDuration
|
||||
}
|
||||
|
||||
@@ -2461,11 +2447,11 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
|
||||
|
||||
// shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试
|
||||
// 返回:
|
||||
// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold)
|
||||
// - shouldRateLimitModel: 是否应该限流模型(retryDelay >= antigravityRateLimitThreshold)
|
||||
// - waitDuration: 等待时间(智能重试时使用,shouldRateLimitModel=true 时为限流时长)
|
||||
// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold,或 MODEL_CAPACITY_EXHAUSTED)
|
||||
// - shouldRateLimitModel: 是否应该限流模型并切换账号(仅 RATE_LIMIT_EXCEEDED 且 retryDelay >= 阈值)
|
||||
// - waitDuration: 等待时间
|
||||
// - modelName: 限流的模型名称
|
||||
// - isModelCapacityExhausted: 是否为 MODEL_CAPACITY_EXHAUSTED(需要独立处理)
|
||||
// - isModelCapacityExhausted: 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED)
|
||||
func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) {
|
||||
if account.Platform != PlatformAntigravity {
|
||||
return false, false, 0, "", false
|
||||
@@ -2476,11 +2462,13 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou
|
||||
return false, false, 0, "", false
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED: 独立处理,不走 7s 阈值判断
|
||||
// MODEL_CAPACITY_EXHAUSTED(模型容量不足):所有账号共享同一模型容量池
|
||||
// 切换账号无意义,使用固定 1s 间隔重试
|
||||
if info.IsModelCapacityExhausted {
|
||||
return true, false, info.RetryDelay, info.ModelName, true
|
||||
return true, false, antigravityModelCapacityRetryWait, info.ModelName, true
|
||||
}
|
||||
|
||||
// RATE_LIMIT_EXCEEDED(账号级限流):
|
||||
// retryDelay >= 阈值:直接限流模型,不重试
|
||||
// 注意:如果上游未提供 retryDelay,parseAntigravitySmartRetryInfo 已设置为默认 30s
|
||||
if info.RetryDelay >= antigravityRateLimitThreshold {
|
||||
@@ -2519,8 +2507,9 @@ type handleModelRateLimitResult struct {
|
||||
|
||||
// handleModelRateLimit 处理模型级限流(在原有逻辑之前调用)
|
||||
// 仅处理 429/503,解析模型名和 retryDelay
|
||||
// - retryDelay < antigravityRateLimitThreshold: 返回 ShouldRetry=true,由调用方等待后重试
|
||||
// - retryDelay >= antigravityRateLimitThreshold: 设置模型限流 + 清除粘性会话 + 返回 SwitchError
|
||||
// - MODEL_CAPACITY_EXHAUSTED: 返回 Handled=true(实际重试由 handleSmartRetry 处理)
|
||||
// - RATE_LIMIT_EXCEEDED + retryDelay < 阈值: 返回 ShouldRetry=true,由调用方等待后重试
|
||||
// - RATE_LIMIT_EXCEEDED + retryDelay >= 阈值: 设置模型限流 + 清除粘性会话 + 返回 SwitchError
|
||||
func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult {
|
||||
if p.statusCode != 429 && p.statusCode != 503 {
|
||||
return &handleModelRateLimitResult{Handled: false}
|
||||
@@ -2531,13 +2520,17 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
|
||||
return &handleModelRateLimitResult{Handled: false}
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED: 容量不足由 handleSmartRetry 独立处理,此处仅标记已处理
|
||||
// 不设置模型限流(容量不足是临时的,不等同于限流)
|
||||
// MODEL_CAPACITY_EXHAUSTED:模型容量不足,所有账号共享同一容量池
|
||||
// 切换账号无意义,不设置模型限流(实际重试由 handleSmartRetry 处理)
|
||||
if info.IsModelCapacityExhausted {
|
||||
return &handleModelRateLimitResult{Handled: true}
|
||||
log.Printf("%s status=%d model_capacity_exhausted model=%s (not switching account, retry handled by smart retry)",
|
||||
p.prefix, p.statusCode, info.ModelName)
|
||||
return &handleModelRateLimitResult{
|
||||
Handled: true,
|
||||
}
|
||||
}
|
||||
|
||||
// < antigravityRateLimitThreshold: 等待后重试
|
||||
// RATE_LIMIT_EXCEEDED: < antigravityRateLimitThreshold: 等待后重试
|
||||
if info.RetryDelay < antigravityRateLimitThreshold {
|
||||
log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v",
|
||||
p.prefix, p.statusCode, info.ModelName, info.RetryDelay)
|
||||
@@ -2548,7 +2541,7 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
|
||||
}
|
||||
}
|
||||
|
||||
// >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号
|
||||
// RATE_LIMIT_EXCEEDED: >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号
|
||||
s.setModelRateLimitAndClearSession(p, info)
|
||||
|
||||
return &handleModelRateLimitResult{
|
||||
@@ -3321,6 +3314,21 @@ func (s *AntigravityGatewayService) writeMappedClaudeError(c *gin.Context, accou
|
||||
log.Printf("[antigravity-Forward] upstream_error status=%d body=%s", upstreamStatus, truncateForLog(body, maxBytes))
|
||||
}
|
||||
|
||||
// 检查错误透传规则
|
||||
if ptStatus, ptErrType, ptErrMsg, matched := applyErrorPassthroughRule(
|
||||
c, account.Platform, upstreamStatus, body,
|
||||
0, "", "",
|
||||
); matched {
|
||||
c.JSON(ptStatus, gin.H{
|
||||
"type": "error",
|
||||
"error": gin.H{"type": ptErrType, "message": ptErrMsg},
|
||||
})
|
||||
if upstreamMsg == "" {
|
||||
return fmt.Errorf("upstream error: %d", upstreamStatus)
|
||||
}
|
||||
return fmt.Errorf("upstream error: %d message=%s", upstreamStatus, upstreamMsg)
|
||||
}
|
||||
|
||||
var statusCode int
|
||||
var errType, errMsg string
|
||||
|
||||
|
||||
@@ -86,7 +86,9 @@ func (s *stubAntigravityAccountRepo) SetModelRateLimit(ctx context.Context, id i
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
|
||||
func TestAntigravityRetryLoop_NoURLFallback_UsesConfiguredBaseURL(t *testing.T) {
|
||||
t.Setenv(antigravityForwardBaseURLEnv, "")
|
||||
|
||||
oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
|
||||
oldAvailability := antigravity.DefaultURLAvailability
|
||||
defer func() {
|
||||
@@ -131,15 +133,16 @@ func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
|
||||
require.NotNil(t, result)
|
||||
require.NotNil(t, result.resp)
|
||||
defer func() { _ = result.resp.Body.Close() }()
|
||||
require.Equal(t, http.StatusOK, result.resp.StatusCode)
|
||||
require.False(t, handleErrorCalled)
|
||||
require.Len(t, upstream.calls, 2)
|
||||
require.True(t, strings.HasPrefix(upstream.calls[0], base1))
|
||||
require.True(t, strings.HasPrefix(upstream.calls[1], base2))
|
||||
require.Equal(t, http.StatusTooManyRequests, result.resp.StatusCode)
|
||||
require.True(t, handleErrorCalled)
|
||||
require.Len(t, upstream.calls, antigravityMaxRetries)
|
||||
for _, callURL := range upstream.calls {
|
||||
require.True(t, strings.HasPrefix(callURL, base1))
|
||||
}
|
||||
|
||||
available := antigravity.DefaultURLAvailability.GetAvailableURLs()
|
||||
require.NotEmpty(t, available)
|
||||
require.Equal(t, base2, available[0])
|
||||
require.Equal(t, base1, available[0])
|
||||
}
|
||||
|
||||
// TestHandleUpstreamError_429_ModelRateLimit 测试 429 模型限流场景
|
||||
@@ -189,13 +192,13 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
|
||||
}
|
||||
|
||||
// TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景
|
||||
// MODEL_CAPACITY_EXHAUSTED 标记 Handled 但不设模型限流(由 handleSmartRetry 独立处理)
|
||||
// MODEL_CAPACITY_EXHAUSTED 时应等待重试,不切换账号
|
||||
func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) {
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
svc := &AntigravityGatewayService{accountRepo: repo}
|
||||
account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
|
||||
|
||||
// 503 + MODEL_CAPACITY_EXHAUSTED → 标记已处理,不设模型限流
|
||||
// 503 + MODEL_CAPACITY_EXHAUSTED → 等待重试,不切换账号
|
||||
body := []byte(`{
|
||||
"error": {
|
||||
"status": "UNAVAILABLE",
|
||||
@@ -208,10 +211,12 @@ func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) {
|
||||
|
||||
result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)
|
||||
|
||||
// 应该标记已处理,但不设模型限流
|
||||
// MODEL_CAPACITY_EXHAUSTED 应该标记为已处理,不切换账号,不设置模型限流
|
||||
// 实际重试由 handleSmartRetry 处理
|
||||
require.NotNil(t, result)
|
||||
require.True(t, result.Handled)
|
||||
require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger switch error in handleModelRateLimit")
|
||||
require.False(t, result.ShouldRetry, "MODEL_CAPACITY_EXHAUSTED should not trigger retry from handleModelRateLimit path")
|
||||
require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger account switch")
|
||||
require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
|
||||
}
|
||||
|
||||
@@ -300,11 +305,12 @@ func TestParseGeminiRateLimitResetTime_QuotaResetDelay_RoundsUp(t *testing.T) {
|
||||
|
||||
func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
expectedDelay time.Duration
|
||||
expectedModel string
|
||||
expectedNil bool
|
||||
name string
|
||||
body string
|
||||
expectedDelay time.Duration
|
||||
expectedModel string
|
||||
expectedNil bool
|
||||
expectedIsModelCapacityExhausted bool
|
||||
}{
|
||||
{
|
||||
name: "valid complete response with RATE_LIMIT_EXCEEDED",
|
||||
@@ -367,8 +373,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
||||
"message": "No capacity available for model gemini-3-pro-high on the server"
|
||||
}
|
||||
}`,
|
||||
expectedDelay: 39 * time.Second,
|
||||
expectedModel: "gemini-3-pro-high",
|
||||
expectedDelay: 39 * time.Second,
|
||||
expectedModel: "gemini-3-pro-high",
|
||||
expectedIsModelCapacityExhausted: true,
|
||||
},
|
||||
{
|
||||
name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil",
|
||||
@@ -479,6 +486,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
||||
if result.ModelName != tt.expectedModel {
|
||||
t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel)
|
||||
}
|
||||
if result.IsModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
|
||||
t.Errorf("IsModelCapacityExhausted = %v, want %v", result.IsModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -490,14 +500,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
||||
apiKeyAccount := &Account{Type: AccountTypeAPIKey}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
account *Account
|
||||
body string
|
||||
expectedShouldRetry bool
|
||||
expectedShouldRateLimit bool
|
||||
expectedCapacityExhaust bool
|
||||
minWait time.Duration
|
||||
modelName string
|
||||
name string
|
||||
account *Account
|
||||
body string
|
||||
expectedShouldRetry bool
|
||||
expectedShouldRateLimit bool
|
||||
expectedIsModelCapacityExhausted bool
|
||||
minWait time.Duration
|
||||
modelName string
|
||||
}{
|
||||
{
|
||||
name: "OAuth account with short delay (< 7s) - smart retry",
|
||||
@@ -611,14 +621,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
||||
]
|
||||
}
|
||||
}`,
|
||||
expectedShouldRetry: true,
|
||||
expectedShouldRateLimit: false,
|
||||
expectedCapacityExhaust: true,
|
||||
minWait: 39 * time.Second,
|
||||
modelName: "gemini-3-pro-high",
|
||||
expectedShouldRetry: true,
|
||||
expectedShouldRateLimit: false,
|
||||
expectedIsModelCapacityExhausted: true,
|
||||
minWait: 1 * time.Second,
|
||||
modelName: "gemini-3-pro-high",
|
||||
},
|
||||
{
|
||||
name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use default rate limit",
|
||||
name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use fixed wait",
|
||||
account: oauthAccount,
|
||||
body: `{
|
||||
"error": {
|
||||
@@ -630,11 +640,11 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
||||
"message": "No capacity available for model gemini-2.5-flash on the server"
|
||||
}
|
||||
}`,
|
||||
expectedShouldRetry: true,
|
||||
expectedShouldRateLimit: false,
|
||||
expectedCapacityExhaust: true,
|
||||
minWait: 0, // 无 retryDelay,由 handleModelCapacityExhaustedRetry 决定默认 20s
|
||||
modelName: "gemini-2.5-flash",
|
||||
expectedShouldRetry: true,
|
||||
expectedShouldRateLimit: false,
|
||||
expectedIsModelCapacityExhausted: true,
|
||||
minWait: 1 * time.Second,
|
||||
modelName: "gemini-2.5-flash",
|
||||
},
|
||||
{
|
||||
name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit",
|
||||
@@ -658,26 +668,21 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
shouldRetry, shouldRateLimit, wait, model, isCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
|
||||
shouldRetry, shouldRateLimit, wait, model, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
|
||||
if shouldRetry != tt.expectedShouldRetry {
|
||||
t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
|
||||
}
|
||||
if shouldRateLimit != tt.expectedShouldRateLimit {
|
||||
t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
|
||||
}
|
||||
if isCapacityExhausted != tt.expectedCapacityExhaust {
|
||||
t.Errorf("isCapacityExhausted = %v, want %v", isCapacityExhausted, tt.expectedCapacityExhaust)
|
||||
if isModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
|
||||
t.Errorf("isModelCapacityExhausted = %v, want %v", isModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
|
||||
}
|
||||
if shouldRetry && !isCapacityExhausted {
|
||||
if shouldRetry {
|
||||
if wait < tt.minWait {
|
||||
t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
|
||||
}
|
||||
}
|
||||
if isCapacityExhausted && tt.minWait > 0 {
|
||||
if wait < tt.minWait {
|
||||
t.Errorf("capacity exhausted wait = %v, want >= %v", wait, tt.minWait)
|
||||
}
|
||||
}
|
||||
if shouldRateLimit && tt.minWait > 0 {
|
||||
if wait < tt.minWait {
|
||||
t.Errorf("rate limit wait = %v, want >= %v", wait, tt.minWait)
|
||||
@@ -925,6 +930,22 @@ func TestIsAntigravityAccountSwitchError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAntigravityForwardBaseURL_DefaultDaily(t *testing.T) {
|
||||
t.Setenv(antigravityForwardBaseURLEnv, "")
|
||||
|
||||
oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
|
||||
defer func() {
|
||||
antigravity.BaseURLs = oldBaseURLs
|
||||
}()
|
||||
|
||||
prodURL := "https://prod.test"
|
||||
dailyURL := "https://daily.test"
|
||||
antigravity.BaseURLs = []string{dailyURL, prodURL}
|
||||
|
||||
resolved := resolveAntigravityForwardBaseURL()
|
||||
require.Equal(t, dailyURL, resolved)
|
||||
}
|
||||
|
||||
func TestAntigravityAccountSwitchError_Error(t *testing.T) {
|
||||
err := &AntigravityAccountSwitchError{
|
||||
OriginalAccountID: 789,
|
||||
|
||||
@@ -142,46 +142,25 @@ func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testi
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches
|
||||
// 对照组:503 + retryDelay >= 7s + 无 SingleAccountRetry 标记 + MODEL_CAPACITY_EXHAUSTED
|
||||
// → 走 handleModelCapacityExhaustedRetry:等待后重试 1 次,重试仍失败则切换账号
|
||||
// 对照组:503 + retryDelay >= 7s + 无 SingleAccountRetry 标记
|
||||
// → 照常设模型限流 + 切换账号
|
||||
func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) {
|
||||
// 重试也返回 503 + MODEL_CAPACITY_EXHAUSTED,触发切换账号
|
||||
retryRespBody := `{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
|
||||
]
|
||||
}
|
||||
}`
|
||||
retryResp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(strings.NewReader(retryRespBody)),
|
||||
}
|
||||
upstream := &mockSmartRetryUpstream{
|
||||
responses: []*http.Response{retryResp},
|
||||
errors: []error{nil},
|
||||
}
|
||||
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 2,
|
||||
Name: "acc-multi",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
Concurrency: 1,
|
||||
ID: 2,
|
||||
Name: "acc-multi",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
// 503 + 39s >= 7s 阈值
|
||||
// 503 + 39s >= 7s 阈值(使用 RATE_LIMIT_EXCEEDED 而非 MODEL_CAPACITY_EXHAUSTED,
|
||||
// 因为 MODEL_CAPACITY_EXHAUSTED 走独立的重试路径,不触发 shouldRateLimitModel)
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
|
||||
]
|
||||
}
|
||||
@@ -193,14 +172,13 @@ func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *te
|
||||
}
|
||||
|
||||
params := antigravityRetryLoopParams{
|
||||
ctx: context.Background(), // 关键:无单账号标记
|
||||
prefix: "[test]",
|
||||
account: account,
|
||||
accessToken: "token",
|
||||
action: "generateContent",
|
||||
body: []byte(`{"input":"test"}`),
|
||||
httpUpstream: upstream,
|
||||
accountRepo: repo,
|
||||
ctx: context.Background(), // 关键:无单账号标记
|
||||
prefix: "[test]",
|
||||
account: account,
|
||||
accessToken: "token",
|
||||
action: "generateContent",
|
||||
body: []byte(`{"input":"test"}`),
|
||||
accountRepo: repo,
|
||||
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||
return nil
|
||||
},
|
||||
@@ -213,12 +191,13 @@ func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *te
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
// MODEL_CAPACITY_EXHAUSTED 重试仍失败 → 切换账号
|
||||
require.NotNil(t, result.switchError, "should return switchError after capacity retry fails")
|
||||
// 对照:多账号模式返回 switchError
|
||||
require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
|
||||
require.Nil(t, result.resp, "should not return resp when switchError is set")
|
||||
|
||||
// 验证做了一次重试
|
||||
require.Len(t, upstream.calls, 1, "should have made one capacity retry attempt")
|
||||
// 对照:多账号模式应设模型限流
|
||||
require.Len(t, repo.modelRateLimitCalls, 1,
|
||||
"multi-account mode SHOULD set model rate limit")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches
|
||||
@@ -276,15 +255,13 @@ func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *test
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. handleSmartRetry + 503 + 短延迟 + MODEL_CAPACITY_EXHAUSTED
|
||||
// 不论单账号/多账号,都走 handleModelCapacityExhaustedRetry
|
||||
// 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
|
||||
// 503 + retryDelay < 7s + SingleAccountRetry + MODEL_CAPACITY_EXHAUSTED
|
||||
// → 走 handleModelCapacityExhaustedRetry,重试失败后切换账号,不设限流
|
||||
// 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503,不设限流
|
||||
func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
|
||||
// 重试也返回 503 + MODEL_CAPACITY_EXHAUSTED
|
||||
// 智能重试也返回 503
|
||||
failRespBody := `{
|
||||
"error": {
|
||||
"code": 503,
|
||||
@@ -307,11 +284,10 @@ func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testi
|
||||
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 4,
|
||||
Name: "acc-short-503",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
Concurrency: 1,
|
||||
ID: 4,
|
||||
Name: "acc-short-503",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
// 0.1s < 7s 阈值
|
||||
@@ -352,57 +328,54 @@ func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testi
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
// MODEL_CAPACITY_EXHAUSTED 重试失败 → 切换账号
|
||||
require.NotNil(t, result.switchError, "should return switchError after capacity retry fails")
|
||||
// 关键断言:单账号 503 模式下,智能重试耗尽后直接返回 503 响应,不切换
|
||||
require.NotNil(t, result.resp, "should return 503 response directly for single account mode")
|
||||
require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
|
||||
require.Nil(t, result.switchError, "should NOT switch account in single account mode")
|
||||
|
||||
// 关键断言:不设模型限流(capacity exhausted 不设限流)
|
||||
// 关键断言:不设模型限流
|
||||
require.Len(t, repo.modelRateLimitCalls, 0,
|
||||
"should NOT set model rate limit for MODEL_CAPACITY_EXHAUSTED")
|
||||
|
||||
// 验证做了一次重试
|
||||
require.Len(t, upstream.calls, 1, "should have made one capacity retry attempt")
|
||||
"should NOT set model rate limit for 503 in single account mode")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit
|
||||
// 对照组:503 + retryDelay < 7s + 无 SingleAccountRetry + MODEL_CAPACITY_EXHAUSTED
|
||||
// → 走 handleModelCapacityExhaustedRetry,重试仍失败则切换账号
|
||||
// 对照组:503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流
|
||||
// 使用 RATE_LIMIT_EXCEEDED 而非 MODEL_CAPACITY_EXHAUSTED,因为后者走独立的 60 次重试路径
|
||||
func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) {
|
||||
// 重试也返回 503 + MODEL_CAPACITY_EXHAUSTED
|
||||
retryRespBody := `{
|
||||
failRespBody := `{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
|
||||
]
|
||||
}
|
||||
}`
|
||||
retryResp := &http.Response{
|
||||
failResp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(strings.NewReader(retryRespBody)),
|
||||
Body: io.NopCloser(strings.NewReader(failRespBody)),
|
||||
}
|
||||
upstream := &mockSmartRetryUpstream{
|
||||
responses: []*http.Response{retryResp},
|
||||
responses: []*http.Response{failResp},
|
||||
errors: []error{nil},
|
||||
}
|
||||
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 5,
|
||||
Name: "acc-multi-503",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
Concurrency: 1,
|
||||
ID: 5,
|
||||
Name: "acc-multi-503",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
|
||||
]
|
||||
}
|
||||
@@ -434,15 +407,11 @@ func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *t
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
// MODEL_CAPACITY_EXHAUSTED 重试仍失败 → 切换账号
|
||||
require.NotNil(t, result.switchError, "should return switchError after capacity retry fails")
|
||||
|
||||
// handleModelCapacityExhaustedRetry 不设模型限流(容量不足是全局状态,不适合限流单个模型)
|
||||
require.Len(t, repo.modelRateLimitCalls, 0,
|
||||
"handleModelCapacityExhaustedRetry should NOT set model rate limit")
|
||||
|
||||
// 验证做了一次重试
|
||||
require.Len(t, upstream.calls, 1, "should have made one capacity retry attempt")
|
||||
// 对照:多账号模式应返回 switchError
|
||||
require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
|
||||
// 对照:多账号模式应设模型限流
|
||||
require.Len(t, repo.modelRateLimitCalls, 1,
|
||||
"multi-account mode should set model rate limit")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -295,20 +294,9 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test
|
||||
require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess
|
||||
// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay < 20s → 按实际 retryDelay 等待后重试 1 次,成功返回
|
||||
func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *testing.T) {
|
||||
// 重试成功的响应
|
||||
successResp := &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(strings.NewReader(`{"ok":true}`)),
|
||||
}
|
||||
upstream := &mockSmartRetryUpstream{
|
||||
responses: []*http.Response{successResp},
|
||||
errors: []error{nil},
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess 测试 503 MODEL_CAPACITY_EXHAUSTED 重试成功
|
||||
// MODEL_CAPACITY_EXHAUSTED 使用固定 1s 间隔重试,不切换账号
|
||||
func TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess(t *testing.T) {
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 3,
|
||||
@@ -317,85 +305,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
// 503 + MODEL_CAPACITY_EXHAUSTED + 0.5s < 20s 阈值 → 按实际 retryDelay 重试 1 次
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
||||
],
|
||||
"message": "No capacity available for model gemini-3-pro-high on the server"
|
||||
}
|
||||
}`)
|
||||
resp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
}
|
||||
|
||||
params := antigravityRetryLoopParams{
|
||||
ctx: context.Background(),
|
||||
prefix: "[test]",
|
||||
account: account,
|
||||
accessToken: "token",
|
||||
action: "generateContent",
|
||||
body: []byte(`{"input":"test"}`),
|
||||
httpUpstream: upstream,
|
||||
accountRepo: repo,
|
||||
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
availableURLs := []string{"https://ag-1.test"}
|
||||
|
||||
svc := &AntigravityGatewayService{}
|
||||
result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
require.NotNil(t, result.resp)
|
||||
require.Equal(t, http.StatusOK, result.resp.StatusCode, "should return success after retry")
|
||||
require.Nil(t, result.switchError, "should not switch account on success")
|
||||
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
|
||||
// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 后重试 1 次,仍失败则切换账号
|
||||
func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
|
||||
// 重试仍然返回容量不足
|
||||
capacityBody := `{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "30s"}
|
||||
]
|
||||
}
|
||||
}`
|
||||
upstream := &mockSmartRetryUpstream{
|
||||
responses: []*http.Response{
|
||||
{
|
||||
StatusCode: 503,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(strings.NewReader(capacityBody)),
|
||||
},
|
||||
},
|
||||
errors: []error{nil},
|
||||
}
|
||||
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 3,
|
||||
Name: "acc-3",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
// 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 20s 阈值
|
||||
// 503 + MODEL_CAPACITY_EXHAUSTED + 39s(上游 retryDelay 应被忽略,使用固定 1s)
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
@@ -408,24 +318,28 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
|
||||
}
|
||||
}`)
|
||||
resp := &http.Response{
|
||||
StatusCode: 503,
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
}
|
||||
|
||||
// context 超时短于 20s 等待,验证 context 取消时正确返回
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
|
||||
defer cancel()
|
||||
// mock: 第 1 次重试返回 200 成功
|
||||
upstream := &mockSmartRetryUpstream{
|
||||
responses: []*http.Response{
|
||||
{StatusCode: http.StatusOK, Header: http.Header{}, Body: io.NopCloser(strings.NewReader(`{"ok":true}`))},
|
||||
},
|
||||
errors: []error{nil},
|
||||
}
|
||||
|
||||
params := antigravityRetryLoopParams{
|
||||
ctx: ctx,
|
||||
ctx: context.Background(),
|
||||
prefix: "[test]",
|
||||
account: account,
|
||||
accessToken: "token",
|
||||
action: "generateContent",
|
||||
body: []byte(`{"input":"test"}`),
|
||||
httpUpstream: upstream,
|
||||
accountRepo: repo,
|
||||
httpUpstream: upstream,
|
||||
isStickySession: true,
|
||||
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||
return nil
|
||||
@@ -439,8 +353,67 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
// context 超时会导致提前返回
|
||||
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
|
||||
require.NotNil(t, result.resp, "should return successful response")
|
||||
require.Equal(t, http.StatusOK, result.resp.StatusCode)
|
||||
require.Nil(t, result.err)
|
||||
require.Nil(t, result.switchError, "MODEL_CAPACITY_EXHAUSTED should not return switchError")
|
||||
|
||||
// 不应设置模型限流
|
||||
require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
|
||||
require.Len(t, upstream.calls, 1, "should have made one retry call before success")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel 测试 MODEL_CAPACITY_EXHAUSTED 上下文取消
|
||||
func TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel(t *testing.T) {
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
account := &Account{
|
||||
ID: 3,
|
||||
Name: "acc-3",
|
||||
Type: AccountTypeOAuth,
|
||||
Platform: PlatformAntigravity,
|
||||
}
|
||||
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
resp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
}
|
||||
|
||||
// 立即取消上下文,验证重试循环能正确退出
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
params := antigravityRetryLoopParams{
|
||||
ctx: ctx,
|
||||
prefix: "[test]",
|
||||
account: account,
|
||||
accessToken: "token",
|
||||
action: "generateContent",
|
||||
body: []byte(`{"input":"test"}`),
|
||||
accountRepo: repo,
|
||||
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
svc := &AntigravityGatewayService{}
|
||||
result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"})
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||
require.Error(t, result.err, "should return context error")
|
||||
require.Nil(t, result.switchError, "should not return switchError on context cancel")
|
||||
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit on context cancel")
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑
|
||||
@@ -1216,21 +1189,21 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t
|
||||
require.Equal(t, "sticky-net-error", cache.deleteCalls[0].sessionHash)
|
||||
}
|
||||
|
||||
// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount
|
||||
// 503 + 短延迟 + 容量不足 + 重试失败 → 切换账号(不设模型限流)
|
||||
func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount(t *testing.T) {
|
||||
// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
|
||||
// 429 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定
|
||||
func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) {
|
||||
failRespBody := `{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"code": 429,
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
||||
]
|
||||
}
|
||||
}`
|
||||
failResp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(strings.NewReader(failRespBody)),
|
||||
}
|
||||
@@ -1240,6 +1213,7 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccou
|
||||
}
|
||||
|
||||
repo := &stubAntigravityAccountRepo{}
|
||||
cache := &stubSmartRetryCache{}
|
||||
account := &Account{
|
||||
ID: 16,
|
||||
Name: "acc-16",
|
||||
@@ -1249,16 +1223,16 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccou
|
||||
|
||||
respBody := []byte(`{
|
||||
"error": {
|
||||
"code": 503,
|
||||
"status": "UNAVAILABLE",
|
||||
"code": 429,
|
||||
"status": "RESOURCE_EXHAUSTED",
|
||||
"details": [
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
resp := &http.Response{
|
||||
StatusCode: http.StatusServiceUnavailable,
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
Header: http.Header{},
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
}
|
||||
@@ -1282,15 +1256,21 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccou
|
||||
|
||||
availableURLs := []string{"https://ag-1.test"}
|
||||
|
||||
svc := &AntigravityGatewayService{}
|
||||
svc := &AntigravityGatewayService{cache: cache}
|
||||
result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
|
||||
|
||||
require.NotNil(t, result)
|
||||
require.NotNil(t, result.switchError, "should switch account after capacity retry exhausted")
|
||||
require.NotNil(t, result.switchError)
|
||||
require.True(t, result.switchError.IsStickySession)
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 不应设置模型限流
|
||||
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
|
||||
// 验证粘性绑定被清除
|
||||
require.Len(t, cache.deleteCalls, 1)
|
||||
require.Equal(t, int64(77), cache.deleteCalls[0].groupID)
|
||||
require.Equal(t, "sticky-503-short", cache.deleteCalls[0].sessionHash)
|
||||
|
||||
// 验证模型限流已设置
|
||||
require.Len(t, repo.modelRateLimitCalls, 1)
|
||||
require.Equal(t, "gemini-3-pro", repo.modelRateLimitCalls[0].modelKey)
|
||||
}
|
||||
|
||||
// TestAntigravityRetryLoop_SmartRetryFailed_StickySession_SwitchErrorPropagates
|
||||
|
||||
@@ -61,6 +61,11 @@ func applyErrorPassthroughRule(
|
||||
errMsg = *rule.CustomMessage
|
||||
}
|
||||
|
||||
// 命中 skip_monitoring 时在 context 中标记,供 ops_error_logger 跳过记录。
|
||||
if rule.SkipMonitoring {
|
||||
c.Set(OpsSkipPassthroughKey, true)
|
||||
}
|
||||
|
||||
// 与现有 failover 场景保持一致:命中规则时统一返回 upstream_error。
|
||||
errType = "upstream_error"
|
||||
return status, errType, errMsg, true
|
||||
|
||||
@@ -194,6 +194,63 @@ func TestGeminiWriteGeminiMappedError_AppliesRuleFor422(t *testing.T) {
|
||||
assert.Equal(t, "Gemini上游失败", errField["message"])
|
||||
}
|
||||
|
||||
func TestApplyErrorPassthroughRule_SkipMonitoringSetsContextKey(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
|
||||
rule.SkipMonitoring = true
|
||||
|
||||
ruleSvc := &ErrorPassthroughService{}
|
||||
ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
|
||||
BindErrorPassthroughService(c, ruleSvc)
|
||||
|
||||
_, _, _, matched := applyErrorPassthroughRule(
|
||||
c,
|
||||
PlatformAnthropic,
|
||||
http.StatusBadRequest,
|
||||
[]byte(`{"error":{"message":"prompt is too long"}}`),
|
||||
http.StatusBadGateway,
|
||||
"upstream_error",
|
||||
"Upstream request failed",
|
||||
)
|
||||
|
||||
assert.True(t, matched)
|
||||
v, exists := c.Get(OpsSkipPassthroughKey)
|
||||
assert.True(t, exists, "OpsSkipPassthroughKey should be set when skip_monitoring=true")
|
||||
boolVal, ok := v.(bool)
|
||||
assert.True(t, ok, "value should be a bool")
|
||||
assert.True(t, boolVal)
|
||||
}
|
||||
|
||||
func TestApplyErrorPassthroughRule_NoSkipMonitoringDoesNotSetContextKey(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
|
||||
rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
|
||||
rule.SkipMonitoring = false
|
||||
|
||||
ruleSvc := &ErrorPassthroughService{}
|
||||
ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
|
||||
BindErrorPassthroughService(c, ruleSvc)
|
||||
|
||||
_, _, _, matched := applyErrorPassthroughRule(
|
||||
c,
|
||||
PlatformAnthropic,
|
||||
http.StatusBadRequest,
|
||||
[]byte(`{"error":{"message":"prompt is too long"}}`),
|
||||
http.StatusBadGateway,
|
||||
"upstream_error",
|
||||
"Upstream request failed",
|
||||
)
|
||||
|
||||
assert.True(t, matched)
|
||||
_, exists := c.Get(OpsSkipPassthroughKey)
|
||||
assert.False(t, exists, "OpsSkipPassthroughKey should NOT be set when skip_monitoring=false")
|
||||
}
|
||||
|
||||
func newNonFailoverPassthroughRule(statusCode int, keyword string, respCode int, customMessage string) *model.ErrorPassthroughRule {
|
||||
return &model.ErrorPassthroughRule{
|
||||
ID: 1,
|
||||
|
||||
@@ -45,10 +45,20 @@ type ErrorPassthroughService struct {
|
||||
cache ErrorPassthroughCache
|
||||
|
||||
// 本地内存缓存,用于快速匹配
|
||||
localCache []*model.ErrorPassthroughRule
|
||||
localCache []*cachedPassthroughRule
|
||||
localCacheMu sync.RWMutex
|
||||
}
|
||||
|
||||
// cachedPassthroughRule 预计算的规则缓存,避免运行时重复 ToLower
|
||||
type cachedPassthroughRule struct {
|
||||
*model.ErrorPassthroughRule
|
||||
lowerKeywords []string // 预计算的小写关键词
|
||||
lowerPlatforms []string // 预计算的小写平台
|
||||
errorCodeSet map[int]struct{} // 预计算的 error code set
|
||||
}
|
||||
|
||||
const maxBodyMatchLen = 8 << 10 // 8KB,错误信息不会在 8KB 之后才出现
|
||||
|
||||
// NewErrorPassthroughService 创建错误透传规则服务
|
||||
func NewErrorPassthroughService(
|
||||
repo ErrorPassthroughRepository,
|
||||
@@ -150,17 +160,19 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
|
||||
return nil
|
||||
}
|
||||
|
||||
bodyStr := strings.ToLower(string(body))
|
||||
lowerPlatform := strings.ToLower(platform)
|
||||
var bodyLower string // 延迟初始化,只在需要关键词匹配时计算
|
||||
var bodyLowerDone bool
|
||||
|
||||
for _, rule := range rules {
|
||||
if !rule.Enabled {
|
||||
continue
|
||||
}
|
||||
if !s.platformMatches(rule, platform) {
|
||||
if !s.platformMatchesCached(rule, lowerPlatform) {
|
||||
continue
|
||||
}
|
||||
if s.ruleMatches(rule, statusCode, bodyStr) {
|
||||
return rule
|
||||
if s.ruleMatchesOptimized(rule, statusCode, body, &bodyLower, &bodyLowerDone) {
|
||||
return rule.ErrorPassthroughRule
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,7 +180,7 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
|
||||
}
|
||||
|
||||
// getCachedRules 获取缓存的规则列表(按优先级排序)
|
||||
func (s *ErrorPassthroughService) getCachedRules() []*model.ErrorPassthroughRule {
|
||||
func (s *ErrorPassthroughService) getCachedRules() []*cachedPassthroughRule {
|
||||
s.localCacheMu.RLock()
|
||||
rules := s.localCache
|
||||
s.localCacheMu.RUnlock()
|
||||
@@ -223,17 +235,39 @@ func (s *ErrorPassthroughService) reloadRulesFromDB(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setLocalCache 设置本地缓存
|
||||
// setLocalCache 设置本地缓存,预计算小写值和 set 以避免运行时重复计算
|
||||
func (s *ErrorPassthroughService) setLocalCache(rules []*model.ErrorPassthroughRule) {
|
||||
cached := make([]*cachedPassthroughRule, len(rules))
|
||||
for i, r := range rules {
|
||||
cr := &cachedPassthroughRule{ErrorPassthroughRule: r}
|
||||
if len(r.Keywords) > 0 {
|
||||
cr.lowerKeywords = make([]string, len(r.Keywords))
|
||||
for j, kw := range r.Keywords {
|
||||
cr.lowerKeywords[j] = strings.ToLower(kw)
|
||||
}
|
||||
}
|
||||
if len(r.Platforms) > 0 {
|
||||
cr.lowerPlatforms = make([]string, len(r.Platforms))
|
||||
for j, p := range r.Platforms {
|
||||
cr.lowerPlatforms[j] = strings.ToLower(p)
|
||||
}
|
||||
}
|
||||
if len(r.ErrorCodes) > 0 {
|
||||
cr.errorCodeSet = make(map[int]struct{}, len(r.ErrorCodes))
|
||||
for _, code := range r.ErrorCodes {
|
||||
cr.errorCodeSet[code] = struct{}{}
|
||||
}
|
||||
}
|
||||
cached[i] = cr
|
||||
}
|
||||
|
||||
// 按优先级排序
|
||||
sorted := make([]*model.ErrorPassthroughRule, len(rules))
|
||||
copy(sorted, rules)
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
return sorted[i].Priority < sorted[j].Priority
|
||||
sort.Slice(cached, func(i, j int) bool {
|
||||
return cached[i].Priority < cached[j].Priority
|
||||
})
|
||||
|
||||
s.localCacheMu.Lock()
|
||||
s.localCache = sorted
|
||||
s.localCache = cached
|
||||
s.localCacheMu.Unlock()
|
||||
}
|
||||
|
||||
@@ -273,62 +307,79 @@ func (s *ErrorPassthroughService) invalidateAndNotify(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// platformMatches 检查平台是否匹配
|
||||
func (s *ErrorPassthroughService) platformMatches(rule *model.ErrorPassthroughRule, platform string) bool {
|
||||
// 如果没有配置平台限制,则匹配所有平台
|
||||
if len(rule.Platforms) == 0 {
|
||||
// ensureBodyLower 延迟初始化 body 的小写版本,只做一次转换,限制 8KB
|
||||
func ensureBodyLower(body []byte, bodyLower *string, done *bool) string {
|
||||
if *done {
|
||||
return *bodyLower
|
||||
}
|
||||
b := body
|
||||
if len(b) > maxBodyMatchLen {
|
||||
b = b[:maxBodyMatchLen]
|
||||
}
|
||||
*bodyLower = strings.ToLower(string(b))
|
||||
*done = true
|
||||
return *bodyLower
|
||||
}
|
||||
|
||||
// platformMatchesCached 使用预计算的小写平台检查是否匹配
|
||||
func (s *ErrorPassthroughService) platformMatchesCached(rule *cachedPassthroughRule, lowerPlatform string) bool {
|
||||
if len(rule.lowerPlatforms) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
platform = strings.ToLower(platform)
|
||||
for _, p := range rule.Platforms {
|
||||
if strings.ToLower(p) == platform {
|
||||
for _, p := range rule.lowerPlatforms {
|
||||
if p == lowerPlatform {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// ruleMatches 检查规则是否匹配
|
||||
func (s *ErrorPassthroughService) ruleMatches(rule *model.ErrorPassthroughRule, statusCode int, bodyLower string) bool {
|
||||
hasErrorCodes := len(rule.ErrorCodes) > 0
|
||||
hasKeywords := len(rule.Keywords) > 0
|
||||
// ruleMatchesOptimized 优化的规则匹配,支持短路和延迟 body 转换
|
||||
func (s *ErrorPassthroughService) ruleMatchesOptimized(rule *cachedPassthroughRule, statusCode int, body []byte, bodyLower *string, bodyLowerDone *bool) bool {
|
||||
hasErrorCodes := len(rule.errorCodeSet) > 0
|
||||
hasKeywords := len(rule.lowerKeywords) > 0
|
||||
|
||||
// 如果没有配置任何条件,不匹配
|
||||
if !hasErrorCodes && !hasKeywords {
|
||||
return false
|
||||
}
|
||||
|
||||
codeMatch := !hasErrorCodes || s.containsInt(rule.ErrorCodes, statusCode)
|
||||
keywordMatch := !hasKeywords || s.containsAnyKeyword(bodyLower, rule.Keywords)
|
||||
codeMatch := !hasErrorCodes || s.containsIntSet(rule.errorCodeSet, statusCode)
|
||||
|
||||
if rule.MatchMode == model.MatchModeAll {
|
||||
// "all" 模式:所有配置的条件都必须满足
|
||||
return codeMatch && keywordMatch
|
||||
// "all" 模式:所有配置的条件都必须满足,短路
|
||||
if hasErrorCodes && !codeMatch {
|
||||
return false
|
||||
}
|
||||
if hasKeywords {
|
||||
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
|
||||
}
|
||||
return codeMatch
|
||||
}
|
||||
|
||||
// "any" 模式:任一条件满足即可
|
||||
// "any" 模式:任一条件满足即可,短路
|
||||
if hasErrorCodes && hasKeywords {
|
||||
return codeMatch || keywordMatch
|
||||
if codeMatch {
|
||||
return true
|
||||
}
|
||||
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
|
||||
}
|
||||
return codeMatch && keywordMatch
|
||||
// 只配置了一种条件
|
||||
if hasKeywords {
|
||||
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
|
||||
}
|
||||
return codeMatch
|
||||
}
|
||||
|
||||
// containsInt 检查切片是否包含指定整数
|
||||
func (s *ErrorPassthroughService) containsInt(slice []int, val int) bool {
|
||||
for _, v := range slice {
|
||||
if v == val {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// containsAnyKeyword 检查字符串是否包含任一关键词(不区分大小写)
|
||||
func (s *ErrorPassthroughService) containsAnyKeyword(bodyLower string, keywords []string) bool {
|
||||
for _, kw := range keywords {
|
||||
if strings.Contains(bodyLower, strings.ToLower(kw)) {
|
||||
// containsIntSet 使用 map 查找替代线性扫描
|
||||
func (s *ErrorPassthroughService) containsIntSet(set map[int]struct{}, val int) bool {
|
||||
_, ok := set[val]
|
||||
return ok
|
||||
}
|
||||
|
||||
// containsAnyKeywordCached 使用预计算的小写关键词检查匹配
|
||||
func (s *ErrorPassthroughService) containsAnyKeywordCached(bodyLower string, lowerKeywords []string) bool {
|
||||
for _, kw := range lowerKeywords {
|
||||
if strings.Contains(bodyLower, kw) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,32 +145,58 @@ func newTestService(rules []*model.ErrorPassthroughRule) *ErrorPassthroughServic
|
||||
return svc
|
||||
}
|
||||
|
||||
// newCachedRuleForTest 从 model.ErrorPassthroughRule 创建 cachedPassthroughRule(测试用)
|
||||
func newCachedRuleForTest(rule *model.ErrorPassthroughRule) *cachedPassthroughRule {
|
||||
cr := &cachedPassthroughRule{ErrorPassthroughRule: rule}
|
||||
if len(rule.Keywords) > 0 {
|
||||
cr.lowerKeywords = make([]string, len(rule.Keywords))
|
||||
for j, kw := range rule.Keywords {
|
||||
cr.lowerKeywords[j] = strings.ToLower(kw)
|
||||
}
|
||||
}
|
||||
if len(rule.Platforms) > 0 {
|
||||
cr.lowerPlatforms = make([]string, len(rule.Platforms))
|
||||
for j, p := range rule.Platforms {
|
||||
cr.lowerPlatforms[j] = strings.ToLower(p)
|
||||
}
|
||||
}
|
||||
if len(rule.ErrorCodes) > 0 {
|
||||
cr.errorCodeSet = make(map[int]struct{}, len(rule.ErrorCodes))
|
||||
for _, code := range rule.ErrorCodes {
|
||||
cr.errorCodeSet[code] = struct{}{}
|
||||
}
|
||||
}
|
||||
return cr
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 测试 ruleMatches 核心匹配逻辑
|
||||
// 测试 ruleMatchesOptimized 核心匹配逻辑
|
||||
// =============================================================================
|
||||
|
||||
func TestRuleMatches_NoConditions(t *testing.T) {
|
||||
// 没有配置任何条件时,不应该匹配
|
||||
svc := newTestService(nil)
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Enabled: true,
|
||||
ErrorCodes: []int{},
|
||||
Keywords: []string{},
|
||||
MatchMode: model.MatchModeAny,
|
||||
}
|
||||
})
|
||||
|
||||
assert.False(t, svc.ruleMatches(rule, 422, "some error message"),
|
||||
var bodyLower string
|
||||
var bodyLowerDone bool
|
||||
assert.False(t, svc.ruleMatchesOptimized(rule, 422, []byte("some error message"), &bodyLower, &bodyLowerDone),
|
||||
"没有配置条件时不应该匹配")
|
||||
}
|
||||
|
||||
func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
|
||||
svc := newTestService(nil)
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Enabled: true,
|
||||
ErrorCodes: []int{422, 400},
|
||||
Keywords: []string{},
|
||||
MatchMode: model.MatchModeAny,
|
||||
}
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -186,7 +212,9 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := svc.ruleMatches(rule, tt.statusCode, tt.body)
|
||||
var bodyLower string
|
||||
var bodyLowerDone bool
|
||||
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
@@ -194,12 +222,12 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
|
||||
|
||||
func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
|
||||
svc := newTestService(nil)
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Enabled: true,
|
||||
ErrorCodes: []int{},
|
||||
Keywords: []string{"context limit", "model not supported"},
|
||||
MatchMode: model.MatchModeAny,
|
||||
}
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -210,16 +238,14 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
|
||||
{"关键词匹配 context limit", 500, "error: context limit reached", true},
|
||||
{"关键词匹配 model not supported", 400, "the model not supported here", true},
|
||||
{"关键词不匹配", 422, "some other error", false},
|
||||
// 注意:ruleMatches 接收的 body 参数应该是已经转换为小写的
|
||||
// 实际使用时,MatchRule 会先将 body 转换为小写再传给 ruleMatches
|
||||
{"关键词大小写 - 输入已小写", 500, "context limit exceeded", true},
|
||||
{"关键词大小写 - 自动转换", 500, "Context Limit exceeded", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// 模拟 MatchRule 的行为:先转换为小写
|
||||
bodyLower := strings.ToLower(tt.body)
|
||||
result := svc.ruleMatches(rule, tt.statusCode, bodyLower)
|
||||
var bodyLower string
|
||||
var bodyLowerDone bool
|
||||
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
@@ -228,12 +254,12 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
|
||||
func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
|
||||
// any 模式:错误码 OR 关键词
|
||||
svc := newTestService(nil)
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Enabled: true,
|
||||
ErrorCodes: []int{422, 400},
|
||||
Keywords: []string{"context limit"},
|
||||
MatchMode: model.MatchModeAny,
|
||||
}
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -274,7 +300,9 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := svc.ruleMatches(rule, tt.statusCode, tt.body)
|
||||
var bodyLower string
|
||||
var bodyLowerDone bool
|
||||
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
|
||||
assert.Equal(t, tt.expected, result, tt.reason)
|
||||
})
|
||||
}
|
||||
@@ -283,12 +311,12 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
|
||||
func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
|
||||
// all 模式:错误码 AND 关键词
|
||||
svc := newTestService(nil)
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Enabled: true,
|
||||
ErrorCodes: []int{422, 400},
|
||||
Keywords: []string{"context limit"},
|
||||
MatchMode: model.MatchModeAll,
|
||||
}
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -329,14 +357,16 @@ func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := svc.ruleMatches(rule, tt.statusCode, tt.body)
|
||||
var bodyLower string
|
||||
var bodyLowerDone bool
|
||||
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
|
||||
assert.Equal(t, tt.expected, result, tt.reason)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// 测试 platformMatches 平台匹配逻辑
|
||||
// 测试 platformMatchesCached 平台匹配逻辑
|
||||
// =============================================================================
|
||||
|
||||
func TestPlatformMatches(t *testing.T) {
|
||||
@@ -394,10 +424,10 @@ func TestPlatformMatches(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rule := &model.ErrorPassthroughRule{
|
||||
rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
|
||||
Platforms: tt.rulePlatforms,
|
||||
}
|
||||
result := svc.platformMatches(rule, tt.requestPlatform)
|
||||
})
|
||||
result := svc.platformMatchesCached(rule, strings.ToLower(tt.requestPlatform))
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -20,6 +20,10 @@ const (
|
||||
// retry the specific upstream attempt (not just the client request).
|
||||
// This value is sanitized+trimmed before being persisted.
|
||||
OpsUpstreamRequestBodyKey = "ops_upstream_request_body"
|
||||
|
||||
// OpsSkipPassthroughKey 由 applyErrorPassthroughRule 在命中 skip_monitoring=true 的规则时设置。
|
||||
// ops_error_logger 中间件检查此 key,为 true 时跳过错误记录。
|
||||
OpsSkipPassthroughKey = "ops_skip_passthrough"
|
||||
)
|
||||
|
||||
func setOpsUpstreamError(c *gin.Context, upstreamStatusCode int, upstreamMessage, upstreamDetail string) {
|
||||
@@ -103,6 +107,37 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
|
||||
evCopy := ev
|
||||
existing = append(existing, &evCopy)
|
||||
c.Set(OpsUpstreamErrorsKey, existing)
|
||||
|
||||
checkSkipMonitoringForUpstreamEvent(c, &evCopy)
|
||||
}
|
||||
|
||||
// checkSkipMonitoringForUpstreamEvent checks whether the upstream error event
|
||||
// matches a passthrough rule with skip_monitoring=true and, if so, sets the
|
||||
// OpsSkipPassthroughKey on the context. This ensures intermediate retry /
|
||||
// failover errors (which never go through the final applyErrorPassthroughRule
|
||||
// path) can still suppress ops_error_logs recording.
|
||||
func checkSkipMonitoringForUpstreamEvent(c *gin.Context, ev *OpsUpstreamErrorEvent) {
|
||||
if ev.UpstreamStatusCode == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
svc := getBoundErrorPassthroughService(c)
|
||||
if svc == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Use the best available body representation for keyword matching.
|
||||
// Even when body is empty, MatchRule can still match rules that only
|
||||
// specify ErrorCodes (no Keywords), so we always call it.
|
||||
body := ev.Detail
|
||||
if body == "" {
|
||||
body = ev.Message
|
||||
}
|
||||
|
||||
rule := svc.MatchRule(ev.Platform, ev.UpstreamStatusCode, []byte(body))
|
||||
if rule != nil && rule.SkipMonitoring {
|
||||
c.Set(OpsSkipPassthroughKey, true)
|
||||
}
|
||||
}
|
||||
|
||||
func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string {
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Add skip_monitoring field to error_passthrough_rules table
|
||||
-- When true, errors matching this rule will not be recorded in ops_error_logs
|
||||
ALTER TABLE error_passthrough_rules
|
||||
ADD COLUMN IF NOT EXISTS skip_monitoring BOOLEAN NOT NULL DEFAULT false;
|
||||
@@ -21,6 +21,7 @@ export interface ErrorPassthroughRule {
|
||||
response_code: number | null
|
||||
passthrough_body: boolean
|
||||
custom_message: string | null
|
||||
skip_monitoring: boolean
|
||||
description: string | null
|
||||
created_at: string
|
||||
updated_at: string
|
||||
@@ -41,6 +42,7 @@ export interface CreateRuleRequest {
|
||||
response_code?: number | null
|
||||
passthrough_body?: boolean
|
||||
custom_message?: string | null
|
||||
skip_monitoring?: boolean
|
||||
description?: string | null
|
||||
}
|
||||
|
||||
@@ -59,6 +61,7 @@ export interface UpdateRuleRequest {
|
||||
response_code?: number | null
|
||||
passthrough_body?: boolean
|
||||
custom_message?: string | null
|
||||
skip_monitoring?: boolean
|
||||
description?: string | null
|
||||
}
|
||||
|
||||
|
||||
@@ -148,6 +148,16 @@
|
||||
{{ rule.passthrough_body ? t('admin.errorPassthrough.passthrough') : t('admin.errorPassthrough.custom') }}
|
||||
</span>
|
||||
</div>
|
||||
<div v-if="rule.skip_monitoring" class="flex items-center gap-1">
|
||||
<Icon
|
||||
name="checkCircle"
|
||||
size="xs"
|
||||
class="text-yellow-500"
|
||||
/>
|
||||
<span class="text-gray-600 dark:text-gray-400">
|
||||
{{ t('admin.errorPassthrough.skipMonitoring') }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-3 py-2">
|
||||
@@ -366,6 +376,19 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Skip Monitoring -->
|
||||
<div class="flex items-center gap-1.5">
|
||||
<input
|
||||
type="checkbox"
|
||||
v-model="form.skip_monitoring"
|
||||
class="h-3.5 w-3.5 rounded border-gray-300 text-yellow-600 focus:ring-yellow-500"
|
||||
/>
|
||||
<span class="text-xs font-medium text-gray-700 dark:text-gray-300">
|
||||
{{ t('admin.errorPassthrough.form.skipMonitoring') }}
|
||||
</span>
|
||||
</div>
|
||||
<p class="input-hint text-xs -mt-3">{{ t('admin.errorPassthrough.form.skipMonitoringHint') }}</p>
|
||||
|
||||
<!-- Enabled -->
|
||||
<div class="flex items-center gap-1.5">
|
||||
<input
|
||||
@@ -453,6 +476,7 @@ const form = reactive({
|
||||
response_code: null as number | null,
|
||||
passthrough_body: true,
|
||||
custom_message: null as string | null,
|
||||
skip_monitoring: false,
|
||||
description: null as string | null
|
||||
})
|
||||
|
||||
@@ -497,6 +521,7 @@ const resetForm = () => {
|
||||
form.response_code = null
|
||||
form.passthrough_body = true
|
||||
form.custom_message = null
|
||||
form.skip_monitoring = false
|
||||
form.description = null
|
||||
errorCodesInput.value = ''
|
||||
keywordsInput.value = ''
|
||||
@@ -520,6 +545,7 @@ const handleEdit = (rule: ErrorPassthroughRule) => {
|
||||
form.response_code = rule.response_code
|
||||
form.passthrough_body = rule.passthrough_body
|
||||
form.custom_message = rule.custom_message
|
||||
form.skip_monitoring = rule.skip_monitoring
|
||||
form.description = rule.description
|
||||
errorCodesInput.value = rule.error_codes.join(', ')
|
||||
keywordsInput.value = rule.keywords.join('\n')
|
||||
@@ -575,6 +601,7 @@ const handleSubmit = async () => {
|
||||
response_code: form.passthrough_code ? null : form.response_code,
|
||||
passthrough_body: form.passthrough_body,
|
||||
custom_message: form.passthrough_body ? null : form.custom_message,
|
||||
skip_monitoring: form.skip_monitoring,
|
||||
description: form.description?.trim() || null
|
||||
}
|
||||
|
||||
|
||||
@@ -3353,6 +3353,7 @@ export default {
|
||||
custom: 'Custom',
|
||||
code: 'Code',
|
||||
body: 'Body',
|
||||
skipMonitoring: 'Skip Monitoring',
|
||||
|
||||
// Columns
|
||||
columns: {
|
||||
@@ -3397,6 +3398,8 @@ export default {
|
||||
passthroughBody: 'Passthrough upstream error message',
|
||||
customMessage: 'Custom error message',
|
||||
customMessagePlaceholder: 'Error message to return to client...',
|
||||
skipMonitoring: 'Skip monitoring',
|
||||
skipMonitoringHint: 'When enabled, errors matching this rule will not be recorded in ops monitoring',
|
||||
enabled: 'Enable this rule'
|
||||
},
|
||||
|
||||
|
||||
@@ -3527,6 +3527,7 @@ export default {
|
||||
custom: '自定义',
|
||||
code: '状态码',
|
||||
body: '消息体',
|
||||
skipMonitoring: '跳过监控',
|
||||
|
||||
// Columns
|
||||
columns: {
|
||||
@@ -3571,6 +3572,8 @@ export default {
|
||||
passthroughBody: '透传上游错误信息',
|
||||
customMessage: '自定义错误信息',
|
||||
customMessagePlaceholder: '返回给客户端的错误信息...',
|
||||
skipMonitoring: '跳过运维监控记录',
|
||||
skipMonitoringHint: '开启后,匹配此规则的错误不会被记录到运维监控中',
|
||||
enabled: '启用此规则'
|
||||
},
|
||||
|
||||
|
||||
Reference in New Issue
Block a user