📝 Add docstrings to feat/accurate-user-search

Docstrings generation was requested by @HenryXiaoYang. * https://github.com/QuantumNous/new-api/pull/2535#issuecomment-3693274850 The following files were modified: * `controller/user.go` * `model/user.go`
feat: support first bind update password (#2520 )
2026-03-30 18:24:01 +00:00 · 2025-12-26 19:31:52 +00:00 · 2025-12-26 13:59:56 +08:00 · 2025-12-26 13:58:44 +08:00 · 2025-12-26 13:57:56 +08:00 · 2025-12-26 13:56:30 +08:00
223 changed files with 27065 additions and 2680 deletions
--- a/.env.example
+++ b/.env.example
@@ -9,6 +9,14 @@
 # ENABLE_PPROF=true
 # 启用调试模式
 # DEBUG=true
+# Pyroscope 配置
+# PYROSCOPE_URL=http://localhost:4040
+# PYROSCOPE_APP_NAME=new-api
+# PYROSCOPE_BASIC_AUTH_USER=your-user
+# PYROSCOPE_BASIC_AUTH_PASSWORD=your-password
+# PYROSCOPE_MUTEX_RATE=5
+# PYROSCOPE_BLOCK_RATE=5
+# HOSTNAME=your-hostname

 # 数据库相关配置
 # 数据库连接字符串
@@ -63,10 +71,13 @@
 # 是否统计图片token
 # GET_MEDIA_TOKEN=true
 # 是否在非流（stream=false）情况下统计图片token
-# GET_MEDIA_TOKEN_NOT_STREAM=true
+# GET_MEDIA_TOKEN_NOT_STREAM=false
 # 设置 Dify 渠道是否输出工作流和节点信息到客户端
 # DIFY_DEBUG=true

+# LinuxDo相关配置
+LINUX_DO_TOKEN_ENDPOINT=https://connect.linux.do/oauth2/token
+LINUX_DO_USER_ENDPOINT=https://connect.linux.do/api/user

 # 节点类型
 # 如果是主节点则为master
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -22,6 +22,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -31,7 +35,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -40,13 +44,11 @@ jobs:
      - name: Build Backend (amd64)
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-$VERSION
      - name: Build Backend (arm64)
        run: |
          sudo apt-get update
          DEBIAN_FRONTEND=noninteractive sudo apt-get install -y gcc-aarch64-linux-gnu
-          VERSION=$(git describe --tags)
          CC=aarch64-linux-gnu-gcc CGO_ENABLED=1 GOOS=linux GOARCH=arm64 go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION' -extldflags '-static'" -o new-api-arm64-$VERSION
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -65,6 +67,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -75,7 +81,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -84,7 +90,6 @@ jobs:
      - name: Build Backend
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-X 'new-api/common.Version=$VERSION'" -o new-api-macos-$VERSION
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -105,6 +110,10 @@ jobs:
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
+      - name: Determine Version
+        run: |
+          VERSION=$(git describe --tags)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - uses: oven-sh/setup-bun@v2
        with:
          bun-version: latest
@@ -114,7 +123,7 @@ jobs:
        run: |
          cd web
          bun install
-          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$(git describe --tags) bun run build
+          DISABLE_ESLINT_PLUGIN='true' VITE_REACT_APP_VERSION=$VERSION bun run build
          cd ..
      - name: Set up Go
        uses: actions/setup-go@v3
@@ -123,7 +132,6 @@ jobs:
      - name: Build Backend
        run: |
          go mod download
-          VERSION=$(git describe --tags)
          go build -ldflags "-s -w -X 'new-api/common.Version=$VERSION'" -o new-api-$VERSION.exe
      - name: Release
        uses: softprops/action-gh-release@v2
@@ -132,5 +140,3 @@ jobs:
          files: new-api-*.exe
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,11 @@ new-api
 tiktoken_cache
 .eslintcache
 .gocache
+.gomodcache/
+.cache
+web/bun.lock

 electron/node_modules
 electron/dist
+data/
+.gomodcache/
--- a/9
+++ b/9
@@ -14,7 +14,7 @@ ENV GO111MODULE=on CGO_ENABLED=0
 ARG TARGETOS
 ARG TARGETARCH
 ENV GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64}
-
+ENV GOEXPERIMENT=greenteagc

 WORKDIR /build

@@ -25,10 +25,11 @@ COPY . .
 COPY --from=builder /build/dist ./web/dist
 RUN go build -ldflags "-s -w -X 'github.com/QuantumNous/new-api/common.Version=$(cat VERSION)'" -o new-api

-FROM alpine
+FROM debian:bookworm-slim

-RUN apk upgrade --no-cache \
-    && apk add --no-cache ca-certificates tzdata \
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends ca-certificates tzdata libasan8 wget \
+    && rm -rf /var/lib/apt/lists/* \
    && update-ca-certificates

 COPY --from=builder2 /build/new-api /
--- a/README.en.md
+++ b/README.en.md
@@ -1,19 +1,17 @@
-<p align="right">
-   <a href="./README.md">中文</a> | <strong>English</strong> | <a href="./README.fr.md">Français</a> | <a href="./README.ja.md">日本語</a>
-</p>
-
-> [!NOTE]
-> **MT (Machine Translation)**: This document is machine translated. For the most accurate information, please refer to the [Chinese version](./README.md).
-
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥 Next-Generation Large Model Gateway and AI Asset Management System
+🍥 **Next-Generation Large Model Gateway and AI Asset Management System**

-<a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<p align="center">
+  <a href="./README.md">中文</a> | 
+  <strong>English</strong> | 
+  <a href="./README.fr.md">Français</a> | 
+  <a href="./README.ja.md">日本語</a>
+</p>

 <p align="center">
  <a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
@@ -32,6 +30,21 @@
    <img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
  </a>
 </p>
+
+<p align="center">
+  <a href="https://trendshift.io/repositories/8227" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
+  </a>
+</p>
+
+<p align="center">
+  <a href="#-quick-start">Quick Start</a> •
+  <a href="#-key-features">Key Features</a> •
+  <a href="#-deployment">Deployment</a> •
+  <a href="#-documentation">Documentation</a> •
+  <a href="#-help-support">Help</a>
+</p>
+
 </div>

 ## 📝 Project Description
@@ -40,186 +53,405 @@
 > This is an open-source project developed based on [One API](https://github.com/songquanpeng/one-api)

 > [!IMPORTANT]  
-> - This project is for personal learning purposes only, with no guarantee of stability or technical support.
-> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes.
+> - This project is for personal learning purposes only, with no guarantee of stability or technical support
+> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes
 > - According to the [《Interim Measures for the Management of Generative Artificial Intelligence Services》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), please do not provide any unregistered generative AI services to the public in China.

-<h2>🤝 Trusted Partners</h2>
-<p id="premium-sponsors">&nbsp;</p>
-<p align="center"><strong>No particular order</strong></p>
+---
+
+## 🤝 Trusted Partners
+
 <p align="center">
-  <a href="https://www.cherry-ai.com/" target=_blank><img
-    src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="120"
-  /></a>
-  <a href="https://bda.pku.edu.cn/" target=_blank><img
-    src="./docs/images/pku.png" alt="Peking University" height="120"
-  /></a>
-  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target=_blank><img
-    src="./docs/images/ucloud.png" alt="UCloud" height="120"
-  /></a>
-  <a href="https://www.aliyun.com/" target=_blank><img
-    src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="120"
-  /></a>
-  <a href="https://io.net/" target=_blank><img
-    src="./docs/images/io-net.png" alt="IO.NET" height="120"
-  /></a>
+  <em>No particular order</em>
 </p>
-<p>&nbsp;</p>

-## 📚 Documentation
+<p align="center">
+  <a href="https://www.cherry-ai.com/" target="_blank">
+    <img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
+  </a>
+  <a href="https://bda.pku.edu.cn/" target="_blank">
+    <img src="./docs/images/pku.png" alt="Peking University" height="80" />
+  </a>
+  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
+    <img src="./docs/images/ucloud.png" alt="UCloud" height="80" />
+  </a>
+  <a href="https://www.aliyun.com/" target="_blank">
+    <img src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="80" />
+  </a>
+  <a href="https://io.net/" target="_blank">
+    <img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
+  </a>
+</p>

-For detailed documentation, please visit our official Wiki: [https://docs.newapi.pro/](https://docs.newapi.pro/)
+---

-You can also access the AI-generated DeepWiki:
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+## 🙏 Special Thanks

-## ✨ Key Features
+<p align="center">
+  <a href="https://www.jetbrains.com/?from=new-api" target="_blank">
+    <img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
+  </a>
+</p>

-New API offers a wide range of features, please refer to [Features Introduction](https://docs.newapi.pro/wiki/features-introduction) for details:
+<p align="center">
+  <strong>Thanks to <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> for providing free open-source development license for this project</strong>
+</p>

-1. 🎨 Brand new UI interface
-2. 🌍 Multi-language support
-3. 💰 Online recharge functionality, currently supports EPay and Stripe
-4. 🔍 Support for querying usage quotas with keys (works with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
-5. 🔄 Compatible with the original One API database
-6. 💵 Support for pay-per-use model pricing
-7. ⚖️ Support for weighted random channel selection
-8. 📈 Data dashboard (console)
-9. 🔒 Token grouping and model restrictions
-10. 🤖 Support for more authorization login methods (LinuxDO, Telegram, OIDC)
-11. 🔄 Support for Rerank models (Cohere and Jina), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
-12. ⚡ Support for OpenAI Realtime API (including Azure channels), [API Documentation](https://docs.newapi.pro/api/openai-realtime)
-13. ⚡ Support for **OpenAI Responses** format, [API Documentation](https://docs.newapi.pro/api/openai-responses)
-14. ⚡ Support for **Claude Messages** format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
-15. ⚡ Support for **Google Gemini** format, [API Documentation](https://docs.newapi.pro/api/google-gemini-chat/)
-16. 🧠 Support for setting reasoning effort through model name suffixes:
-    1. OpenAI o-series models
-        - Add `-high` suffix for high reasoning effort (e.g.: `o3-mini-high`)
-        - Add `-medium` suffix for medium reasoning effort (e.g.: `o3-mini-medium`)
-        - Add `-low` suffix for low reasoning effort (e.g.: `o3-mini-low`)
-    2. Claude thinking models
-        - Add `-thinking` suffix to enable thinking mode (e.g.: `claude-3-7-sonnet-20250219-thinking`)
-17. 🔄 Thinking-to-content functionality
-18. 🔄 Model rate limiting for users
-19. 🔄 Request format conversion functionality, supporting the following three format conversions:
-    1. OpenAI Chat Completions => Claude Messages
-    2. Claude Messages => OpenAI Chat Completions (can be used for Claude Code to call third-party models)
-    3. OpenAI Chat Completions => Gemini Chat
-20. 💰 Cache billing support, which allows billing at a set ratio when cache is hit:
-    1. Set the `Prompt Cache Ratio` option in `System Settings-Operation Settings`
-    2. Set `Prompt Cache Ratio` in the channel, range 0-1, e.g., setting to 0.5 means billing at 50% when cache is hit
-    3. Supported channels:
-        - [x] OpenAI
-        - [x] Azure
-        - [x] DeepSeek
-        - [x] Claude
+---

-## Model Support
+## 🚀 Quick Start

-This version supports multiple models, please refer to [API Documentation-Relay Interface](https://docs.newapi.pro/api) for details:
+### Using Docker Compose (Recommended)

-1. Third-party models **gpts** (gpt-4-gizmo-*)
-2. Third-party channel [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface, [API Documentation](https://docs.newapi.pro/api/midjourney-proxy-image)
-3. Third-party channel [Suno API](https://github.com/Suno-API/Suno-API) interface, [API Documentation](https://docs.newapi.pro/api/suno-music)
-4. Custom channels, supporting full call address input
-5. Rerank models ([Cohere](https://cohere.ai/) and [Jina](https://jina.ai/)), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
-6. Claude Messages format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
-7. Google Gemini format, [API Documentation](https://docs.newapi.pro/api/google-gemini-chat/)
-8. Dify, currently only supports chatflow
-9. For more interfaces, please refer to [API Documentation](https://docs.newapi.pro/api)
-
-## Environment Variable Configuration
-
-For detailed configuration instructions, please refer to [Installation Guide-Environment Variables Configuration](https://docs.newapi.pro/installation/environment-variables):
-
- `GENERATE_DEFAULT_TOKEN`: Whether to generate initial tokens for newly registered users, default is `false`
- `STREAMING_TIMEOUT`: Streaming response timeout, default is 300 seconds
- `DIFY_DEBUG`: Whether to output workflow and node information for Dify channels, default is `true`
- `GET_MEDIA_TOKEN`: Whether to count image tokens, default is `true`
- `GET_MEDIA_TOKEN_NOT_STREAM`: Whether to count image tokens in non-streaming cases, default is `true`
- `UPDATE_TASK`: Whether to update asynchronous tasks (Midjourney, Suno), default is `true`
- `GEMINI_VISION_MAX_IMAGE_NUM`: Maximum number of images for Gemini models, default is `16`
- `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default is `20`
- `CRYPTO_SECRET`: Encryption key used for encrypting Redis database content
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false`
-
-## Deployment
-
-For detailed deployment guides, please refer to [Installation Guide-Deployment Methods](https://docs.newapi.pro/installation):
-
-> [!TIP]
-> Latest Docker image: `calciumion/new-api:latest`
-
-### Multi-machine Deployment Considerations
- Environment variable `SESSION_SECRET` must be set, otherwise login status will be inconsistent across multiple machines
- If sharing Redis, `CRYPTO_SECRET` must be set, otherwise Redis content cannot be accessed across multiple machines
-
-### Deployment Requirements
- Local database (default): SQLite (Docker deployment must mount the `/data` directory)
- Remote database: MySQL version >= 5.7.8, PgSQL version >= 9.6
-
-### Deployment Methods
-
-#### Using BaoTa Panel Docker Feature
-Install BaoTa Panel (version **9.2.0** or above), find **New-API** in the application store and install it.
-[Tutorial with images](./docs/BT.md)
-
-#### Using Docker Compose (Recommended)
-```shell
-# Download the project
-git clone https://github.com/Calcium-Ion/new-api.git
+```bash
+# Clone the project
+git clone https://github.com/QuantumNous/new-api.git
 cd new-api
-# Edit docker-compose.yml as needed
-# Start
+
+# Edit docker-compose.yml configuration
+nano docker-compose.yml
+
+# Start the service
 docker-compose up -d
 ```

-#### Using Docker Image Directly
-```shell
-# Using SQLite
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+<details>
+<summary><strong>Using Docker Commands</strong></summary>
+
+```bash
+# Pull the latest image
+docker pull calciumion/new-api:latest
+
+# Using SQLite (default)
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest

 # Using MySQL
-docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
 ```

-## Channel Retry and Cache
-Channel retry functionality has been implemented, you can set the number of retries in `Settings->Operation Settings->General Settings->Failure Retry Count`, **recommended to enable caching** functionality.
+> **💡 Tip:** `-v ./data:/data` will save data in the `data` folder of the current directory, you can also change it to an absolute path like `-v /your/custom/path:/data`

-### Cache Configuration Method
-1. `REDIS_CONN_STRING`: Set Redis as cache
-2. `MEMORY_CACHE_ENABLED`: Enable memory cache (no need to set manually if Redis is set)
+</details>

-## API Documentation
+---

-For detailed API documentation, please refer to [API Documentation](https://docs.newapi.pro/api):
+🎉 After deployment is complete, visit `http://localhost:3000` to start using!

- [Chat API (Chat Completions)](https://docs.newapi.pro/api/openai-chat)
- [Response API (Responses)](https://docs.newapi.pro/api/openai-responses)
- [Image API (Image)](https://docs.newapi.pro/api/openai-image)
- [Rerank API (Rerank)](https://docs.newapi.pro/api/jinaai-rerank)
- [Realtime Chat API (Realtime)](https://docs.newapi.pro/api/openai-realtime)
- [Claude Chat API](https://docs.newapi.pro/api/anthropic-chat)
- [Google Gemini Chat API](https://docs.newapi.pro/api/google-gemini-chat)
+📖 For more deployment methods, please refer to [Deployment Guide](https://docs.newapi.pro/en/docs/installation)

-## Related Projects
- [One API](https://github.com/songquanpeng/one-api): Original project
- [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy): Midjourney interface support
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool): Query usage quota with key
+---

-Other projects based on New API:
- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon): High-performance optimized version of New API
+## 📚 Documentation

-## Help and Support
+<div align="center">

-If you have any questions, please refer to [Help and Support](https://docs.newapi.pro/support):
- [Community Interaction](https://docs.newapi.pro/support/community-interaction)
- [Issue Feedback](https://docs.newapi.pro/support/feedback-issues)
- [FAQ](https://docs.newapi.pro/support/faq)
+### 📖 [Official Documentation](https://docs.newapi.pro/en/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+
+</div>
+
+**Quick Navigation:**
+
+| Category | Link |
+|------|------|
+| 🚀 Deployment Guide | [Installation Documentation](https://docs.newapi.pro/en/docs/installation) |
+| ⚙️ Environment Configuration | [Environment Variables](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables) |
+| 📡 API Documentation | [API Documentation](https://docs.newapi.pro/en/docs/api) |
+| ❓ FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
+| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
+
+---
+
+## ✨ Key Features
+
+> For detailed features, please refer to [Features Introduction](https://docs.newapi.pro/en/docs/guide/wiki/basic-concepts/features-introduction)
+
+### 🎨 Core Functions
+
+| Feature | Description |
+|------|------|
+| 🎨 New UI | Modern user interface design |
+| 🌍 Multi-language | Supports Chinese, English, French, Japanese |
+| 🔄 Data Compatibility | Fully compatible with the original One API database |
+| 📈 Data Dashboard | Visual console and statistical analysis |
+| 🔒 Permission Management | Token grouping, model restrictions, user management |
+
+### 💰 Payment and Billing
+
+- ✅ Online recharge (EPay, Stripe)
+- ✅ Pay-per-use model pricing
+- ✅ Cache billing support (OpenAI, Azure, DeepSeek, Claude, Qwen and all supported models)
+- ✅ Flexible billing policy configuration
+
+### 🔐 Authorization and Security
+
+- 😈 Discord authorization login
+- 🤖 LinuxDO authorization login
+- 📱 Telegram authorization login
+- 🔑 OIDC unified authentication
+
+### 🚀 Advanced Features
+
+**API Format Support:**
+- ⚡ [OpenAI Responses](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
+- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session) (including Azure)
+- ⚡ [Claude Messages](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
+- ⚡ [Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
+- 🔄 [Rerank Models](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) (Cohere, Jina)
+
+**Intelligent Routing:**
+- ⚖️ Channel weighted random
+- 🔄 Automatic retry on failure
+- 🚦 User-level model rate limiting
+
+**Format Conversion:**
+- 🔄 OpenAI ⇄ Claude Messages
+- 🔄 OpenAI ⇄ Gemini Chat
+- 🔄 Thinking-to-content functionality
+
+**Reasoning Effort Support:**
+
+<details>
+<summary>View detailed configuration</summary>
+
+**OpenAI series models:**
+- `o3-mini-high` - High reasoning effort
+- `o3-mini-medium` - Medium reasoning effort
+- `o3-mini-low` - Low reasoning effort
+- `gpt-5-high` - High reasoning effort
+- `gpt-5-medium` - Medium reasoning effort
+- `gpt-5-low` - Low reasoning effort
+
+**Claude thinking models:**
+- `claude-3-7-sonnet-20250219-thinking` - Enable thinking mode
+
+**Google Gemini series models:**
+- `gemini-2.5-flash-thinking` - Enable thinking mode
+- `gemini-2.5-flash-nothinking` - Disable thinking mode
+- `gemini-2.5-pro-thinking` - Enable thinking mode
+- `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens
+- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed).
+
+</details>
+
+---
+
+## 🤖 Model Support
+
+> For details, please refer to [API Documentation - Relay Interface](https://docs.newapi.pro/en/docs/api)
+
+| Model Type | Description | Documentation |
+|---------|------|------|
+| 🤖 OpenAI GPTs | gpt-4-gizmo-* series | - |
+| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/en/api/midjourney-proxy-image) |
+| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/en/api/suno-music) |
+| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) |
+| 💬 Claude | Messages format | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message) |
+| 🌐 Gemini | Google Gemini format | [Documentation](https://doc.newapi.pro/en/api/google-gemini-chat) |
+| 🔧 Dify | ChatFlow mode | - |
+| 🎯 Custom | Supports complete call address | - |
+
+### 📡 Supported Interfaces
+
+<details>
+<summary>View complete interface list</summary>
+
+- [Chat Interface (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-chat-completion)
+- [Response Interface (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
+- [Image Interface (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/v1-images-generations--post)
+- [Audio Interface (Audio)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/create-transcription)
+- [Video Interface (Video)](https://docs.newapi.pro/en/docs/api/ai-model/videos/create-video-generation)
+- [Embedding Interface (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/create-embedding)
+- [Rerank Interface (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank)
+- [Realtime Conversation (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session)
+- [Claude Chat](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
+- [Google Gemini Chat](https://doc.newapi.pro/en/api/google-gemini-chat)
+
+</details>
+
+---
+
+## 🚢 Deployment
+
+> [!TIP]
+> **Latest Docker image:** `calciumion/new-api:latest`
+
+### 📋 Deployment Requirements
+
+| Component | Requirement |
+|------|------|
+| **Local database** | SQLite (Docker must mount `/data` directory)|
+| **Remote database** | MySQL ≥ 5.7.8 or PostgreSQL ≥ 9.6 |
+| **Container engine** | Docker / Docker Compose |
+
+### ⚙️ Environment Variable Configuration
+
+<details>
+<summary>Common environment variable configuration</summary>
+
+| Variable Name | Description | Default Value |
+|--------|------|--------|
+| `SESSION_SECRET` | Session secret (required for multi-machine deployment) | - |
+| `CRYPTO_SECRET` | Encryption secret (required for Redis) | - |
+| `SQL_DSN` | Database connection string | - |
+| `REDIS_CONN_STRING` | Redis connection string | - |
+| `STREAMING_TIMEOUT` | Streaming timeout (seconds) | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | Max per-line buffer (MB) for the stream scanner; increase when upstream sends huge image/base64 payloads | `64` |
+| `MAX_REQUEST_BODY_MB` | Max request body size (MB, counted **after decompression**; prevents huge requests/zip bombs from exhausting memory). Exceeding it returns `413` | `32` |
+| `AZURE_DEFAULT_API_VERSION` | Azure API version | `2025-04-01-preview` |
+| `ERROR_LOG_ENABLED` | Error log switch | `false` |
+| `PYROSCOPE_URL` | Pyroscope server address | - |
+| `PYROSCOPE_APP_NAME` | Pyroscope application name | `new-api` |
+| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope basic auth user | - |
+| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope basic auth password | - |
+| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex sampling rate | `5` |
+| `PYROSCOPE_BLOCK_RATE` | Pyroscope block sampling rate | `5` |
+| `HOSTNAME` | Hostname tag for Pyroscope | `new-api` |
+
+📖 **Complete configuration:** [Environment Variables Documentation](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables)
+
+</details>
+
+### 🔧 Deployment Methods
+
+<details>
+<summary><strong>Method 1: Docker Compose (Recommended)</strong></summary>
+
+```bash
+# Clone the project
+git clone https://github.com/QuantumNous/new-api.git
+cd new-api
+
+# Edit configuration
+nano docker-compose.yml
+
+# Start service
+docker-compose up -d
+```
+
+</details>
+
+<details>
+<summary><strong>Method 2: Docker Commands</strong></summary>
+
+**Using SQLite:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+**Using MySQL:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+> **💡 Path explanation:** 
+> - `./data:/data` - Relative path, data saved in the data folder of the current directory
+> - You can also use absolute path, e.g.: `/your/custom/path:/data`
+
+</details>
+
+<details>
+<summary><strong>Method 3: BaoTa Panel</strong></summary>
+
+1. Install BaoTa Panel (≥ 9.2.0 version)
+2. Search for **New-API** in the application store
+3. One-click installation
+
+📖 [Tutorial with images](./docs/BT.md)
+
+</details>
+
+### ⚠️ Multi-machine Deployment Considerations
+
+> [!WARNING]
+> - **Must set** `SESSION_SECRET` - Otherwise login status inconsistent
+> - **Shared Redis must set** `CRYPTO_SECRET` - Otherwise data cannot be decrypted
+
+### 🔄 Channel Retry and Cache
+
+**Retry configuration:** `Settings → Operation Settings → General Settings → Failure Retry Count`
+
+**Cache configuration:**
+- `REDIS_CONN_STRING`: Redis cache (recommended)
+- `MEMORY_CACHE_ENABLED`: Memory cache
+
+---
+
+## 🔗 Related Projects
+
+### Upstream Projects
+
+| Project | Description |
+|------|------|
+| [One API](https://github.com/songquanpeng/one-api) | Original project base |
+| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney interface support |
+
+### Supporting Tools
+
+| Project | Description |
+|------|------|
+| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key quota query tool |
+| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API high-performance optimized version |
+
+---
+
+## 💬 Help Support
+
+### 📖 Documentation Resources
+
+| Resource | Link |
+|------|------|
+| 📘 FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
+| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
+| 🐛 Issue Feedback | [Issue Feedback](https://docs.newapi.pro/en/docs/support/feedback-issues) |
+| 📚 Complete Documentation | [Official Documentation](https://docs.newapi.pro/en/docs) |
+
+### 🤝 Contribution Guide
+
+Welcome all forms of contribution!
+
+- 🐛 Report Bugs
+- 💡 Propose New Features
+- 📝 Improve Documentation
+- 🔧 Submit Code
+
+---

 ## 🌟 Star History

+<div align="center">
+
 [![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+
+</div>
+
+---
+
+<div align="center">
+
+### 💖 Thank you for using New API
+
+If this project is helpful to you, welcome to give us a ⭐️ Star！
+
+**[Official Documentation](https://docs.newapi.pro/en/docs)** • **[Issue Feedback](https://github.com/Calcium-Ion/new-api/issues)** • **[Latest Release](https://github.com/Calcium-Ion/new-api/releases)**
+
+<sub>Built with ❤️ by QuantumNous</sub>
+
+</div>
--- a/README.fr.md
+++ b/README.fr.md
@@ -1,19 +1,17 @@
-<p align="right">
-   <a href="./README.md">中文</a> | <a href="./README.en.md">English</a> | <strong>Français</strong> | <a href="./README.ja.md">日本語</a>
-</p>
-
-> [!NOTE]
-> **MT (Traduction Automatique)**: Ce document est traduit automatiquement. Pour les informations les plus précises, veuillez vous référer à la [version chinoise](./README.md).
-
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥 Passerelle de modèles étendus de nouvelle génération et système de gestion d'actifs d'IA
+🍥 **Passerelle de modèles étendus de nouvelle génération et système de gestion d'actifs d'IA**

-<a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<p align="center">
+  <a href="./README.md">中文</a> | 
+  <a href="./README.en.md">English</a> | 
+  <strong>Français</strong> | 
+  <a href="./README.ja.md">日本語</a>
+</p>

 <p align="center">
  <a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
@@ -32,194 +30,422 @@
    <img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
  </a>
 </p>
+
+<p align="center">
+  <a href="https://trendshift.io/repositories/8227" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
+  </a>
+</p>
+
+<p align="center">
+  <a href="#-démarrage-rapide">Démarrage rapide</a> •
+  <a href="#-fonctionnalités-clés">Fonctionnalités clés</a> •
+  <a href="#-déploiement">Déploiement</a> •
+  <a href="#-documentation">Documentation</a> •
+  <a href="#-aide-support">Aide</a>
+</p>
+
 </div>

 ## 📝 Description du projet

-> [!NOTE]
+> [!NOTE]  
 > Il s'agit d'un projet open-source développé sur la base de [One API](https://github.com/songquanpeng/one-api)

-> [!IMPORTANT]
+> [!IMPORTANT]  
 > - Ce projet est uniquement destiné à des fins d'apprentissage personnel, sans garantie de stabilité ni de support technique.
 > - Les utilisateurs doivent se conformer aux [Conditions d'utilisation](https://openai.com/policies/terms-of-use) d'OpenAI et aux **lois et réglementations applicables**, et ne doivent pas l'utiliser à des fins illégales.
 > - Conformément aux [《Mesures provisoires pour la gestion des services d'intelligence artificielle générative》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), veuillez ne fournir aucun service d'IA générative non enregistré au public en Chine.

-<h2>🤝 Partenaires de confiance</h2>
-<p id="premium-sponsors">&nbsp;</p>
-<p align="center"><strong>Sans ordre particulier</strong></p>
+---
+
+## 🤝 Partenaires de confiance
+
 <p align="center">
-  <a href="https://www.cherry-ai.com/" target=_blank><img
-    src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="120"
-  /></a>
-  <a href="https://bda.pku.edu.cn/" target=_blank><img
-    src="./docs/images/pku.png" alt="Université de Pékin" height="120"
-  /></a>
-  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target=_blank><img
-    src="./docs/images/ucloud.png" alt="UCloud" height="120"
-  /></a>
-  <a href="https://www.aliyun.com/" target=_blank><img
-    src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="120"
-  /></a>
-  <a href="https://io.net/" target=_blank><img
-    src="./docs/images/io-net.png" alt="IO.NET" height="120"
-  /></a>
+  <em>Sans ordre particulier</em>
 </p>
-<p>&nbsp;</p>

-## 📚 Documentation
+<p align="center">
+  <a href="https://www.cherry-ai.com/" target="_blank">
+    <img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
+  </a>
+  <a href="https://bda.pku.edu.cn/" target="_blank">
+    <img src="./docs/images/pku.png" alt="Université de Pékin" height="80" />
+  </a>
+  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
+    <img src="./docs/images/ucloud.png" alt="UCloud" height="80" />
+  </a>
+  <a href="https://www.aliyun.com/" target="_blank">
+    <img src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="80" />
+  </a>
+  <a href="https://io.net/" target="_blank">
+    <img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
+  </a>
+</p>

-Pour une documentation détaillée, veuillez consulter notre Wiki officiel : [https://docs.newapi.pro/](https://docs.newapi.pro/)
+---

-Vous pouvez également accéder au DeepWiki généré par l'IA :
-[![Demander à DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+## 🙏 Remerciements spéciaux

-## ✨ Fonctionnalités clés
+<p align="center">
+  <a href="https://www.jetbrains.com/?from=new-api" target="_blank">
+    <img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
+  </a>
+</p>

-New API offre un large éventail de fonctionnalités, veuillez vous référer à [Présentation des fonctionnalités](https://docs.newapi.pro/wiki/features-introduction) pour plus de détails :
+<p align="center">
+  <strong>Merci à <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> pour avoir fourni une licence de développement open-source gratuite pour ce projet</strong>
+</p>

-1. 🎨 Nouvelle interface utilisateur
-2. 🌍 Prise en charge multilingue
-3. 💰 Fonctionnalité de recharge en ligne, prend actuellement en charge EPay et Stripe
-4. 🔍 Prise en charge de la recherche de quotas d'utilisation avec des clés (fonctionne avec [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
-5. 🔄 Compatible avec la base de données originale de One API
-6. 💵 Prise en charge de la tarification des modèles de paiement à l'utilisation
-7. ⚖️ Prise en charge de la sélection aléatoire pondérée des canaux
-8. 📈 Tableau de bord des données (console)
-9. 🔒 Regroupement de jetons et restrictions de modèles
-10. 🤖 Prise en charge de plus de méthodes de connexion par autorisation (LinuxDO, Telegram, OIDC)
-11. 🔄 Prise en charge des modèles Rerank (Cohere et Jina), [Documentation de l'API](https://docs.newapi.pro/api/jinaai-rerank)
-12. ⚡ Prise en charge de l'API OpenAI Realtime (y compris les canaux Azure), [Documentation de l'API](https://docs.newapi.pro/api/openai-realtime)
-13. ⚡ Prise en charge du format **OpenAI Responses**, [Documentation de l'API](https://docs.newapi.pro/api/openai-responses)
-14. ⚡ Prise en charge du format **Claude Messages**, [Documentation de l'API](https://docs.newapi.pro/api/anthropic-chat)
-15. ⚡ Prise en charge du format **Google Gemini**, [Documentation de l'API](https://docs.newapi.pro/api/google-gemini-chat/)
-16. 🧠 Prise en charge de la définition de l'effort de raisonnement via les suffixes de nom de modèle :
-    1. Modèles de la série o d'OpenAI
-        - Ajouter le suffixe `-high` pour un effort de raisonnement élevé (par exemple : `o3-mini-high`)
-        - Ajouter le suffixe `-medium` pour un effort de raisonnement moyen (par exemple : `o3-mini-medium`)
-        - Ajouter le suffixe `-low` pour un effort de raisonnement faible (par exemple : `o3-mini-low`)
-    2. Modèles de pensée de Claude
-        - Ajouter le suffixe `-thinking` pour activer le mode de pensée (par exemple : `claude-3-7-sonnet-20250219-thinking`)
-17. 🔄 Fonctionnalité de la pensée au contenu
-18. 🔄 Limitation du débit du modèle pour les utilisateurs
-19. 🔄 Fonctionnalité de conversion de format de requête, prenant en charge les trois conversions de format suivantes :
-    1. OpenAI Chat Completions => Claude Messages
-    2. Claude Messages => OpenAI Chat Completions (peut être utilisé pour Claude Code pour appeler des modèles tiers)
-    3. OpenAI Chat Completions => Gemini Chat
-20. 💰 Prise en charge de la facturation du cache, qui permet de facturer à un ratio défini lorsque le cache est atteint :
-    1. Définir l'option `Ratio de cache d'invite` dans `Paramètres système->Paramètres de fonctionnement`
-    2. Définir le `Ratio de cache d'invite` dans le canal, plage de 0 à 1, par exemple, le définir sur 0,5 signifie facturer à 50 % lorsque le cache est atteint
-    3. Canaux pris en charge :
-        - [x] OpenAI
-        - [x] Azure
-        - [x] DeepSeek
-        - [x] Claude
+---

-## Prise en charge des modèles
+## 🚀 Démarrage rapide

-Cette version prend en charge plusieurs modèles, veuillez vous référer à [Documentation de l'API-Interface de relais](https://docs.newapi.pro/api) pour plus de détails :
+### Utilisation de Docker Compose (recommandé)

-1. Modèles tiers **gpts** (gpt-4-gizmo-*)
-2. Canal tiers [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy), [Documentation de l'API](https://docs.newapi.pro/api/midjourney-proxy-image)
-3. Canal tiers [Suno API](https://github.com/Suno-API/Suno-API), [Documentation de l'API](https://docs.newapi.pro/api/suno-music)
-4. Canaux personnalisés, prenant en charge la saisie complète de l'adresse d'appel
-5. Modèles Rerank ([Cohere](https://cohere.ai/) et [Jina](https://jina.ai/)), [Documentation de l'API](https://docs.newapi.pro/api/jinaai-rerank)
-6. Format de messages Claude, [Documentation de l'API](https://docs.newapi.pro/api/anthropic-chat)
-7. Format Google Gemini, [Documentation de l'API](https://docs.newapi.pro/api/google-gemini-chat/)
-8. Dify, ne prend actuellement en charge que chatflow
-9. Pour plus d'interfaces, veuillez vous référer à la [Documentation de l'API](https://docs.newapi.pro/api)
-
-## Configuration des variables d'environnement
-
-Pour des instructions de configuration détaillées, veuillez vous référer à [Guide d'installation-Configuration des variables d'environnement](https://docs.newapi.pro/installation/environment-variables) :
-
- `GENERATE_DEFAULT_TOKEN` : S'il faut générer des jetons initiaux pour les utilisateurs nouvellement enregistrés, la valeur par défaut est `false`
- `STREAMING_TIMEOUT` : Délai d'expiration de la réponse en streaming, la valeur par défaut est de 300 secondes
- `DIFY_DEBUG` : S'il faut afficher les informations sur le flux de travail et les nœuds pour les canaux Dify, la valeur par défaut est `true`
- `GET_MEDIA_TOKEN` : S'il faut compter les jetons d'image, la valeur par défaut est `true`
- `GET_MEDIA_TOKEN_NOT_STREAM` : S'il faut compter les jetons d'image dans les cas sans streaming, la valeur par défaut est `true`
- `UPDATE_TASK` : S'il faut mettre à jour les tâches asynchrones (Midjourney, Suno), la valeur par défaut est `true`
- `GEMINI_VISION_MAX_IMAGE_NUM` : Nombre maximum d'images pour les modèles Gemini, la valeur par défaut est `16`
- `MAX_FILE_DOWNLOAD_MB` : Taille maximale de téléchargement de fichier en Mo, la valeur par défaut est `20`
- `CRYPTO_SECRET` : Clé de chiffrement utilisée pour chiffrer le contenu de la base de données Redis
- `AZURE_DEFAULT_API_VERSION` : Version de l'API par défaut du canal Azure, la valeur par défaut est `2025-04-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE` : Durée de la limite de notification, la valeur par défaut est de `10` minutes
- `NOTIFY_LIMIT_COUNT` : Nombre maximal de notifications utilisateur dans la durée spécifiée, la valeur par défaut est `2`
- `ERROR_LOG_ENABLED=true` : S'il faut enregistrer et afficher les journaux d'erreurs, la valeur par défaut est `false`
-
-## Déploiement
-
-Pour des guides de déploiement détaillés, veuillez vous référer à [Guide d'installation-Méthodes de déploiement](https://docs.newapi.pro/installation) :
-
-> [!TIP]
-> Dernière image Docker : `calciumion/new-api:latest`
-
-### Considérations sur le déploiement multi-machines
- La variable d'environnement `SESSION_SECRET` doit être définie, sinon l'état de connexion sera incohérent sur plusieurs machines
- Si vous partagez Redis, `CRYPTO_SECRET` doit être défini, sinon le contenu de Redis ne pourra pas être consulté sur plusieurs machines
-
-### Exigences de déploiement
- Base de données locale (par défaut) : SQLite (le déploiement Docker doit monter le répertoire `/data`)
- Base de données distante : MySQL version >= 5.7.8, PgSQL version >= 9.6
-
-### Méthodes de déploiement
-
-#### Utilisation de la fonctionnalité Docker du panneau BaoTa
-Installez le panneau BaoTa (version **9.2.0** ou supérieure), recherchez **New-API** dans le magasin d'applications et installez-le.
-[Tutoriel avec des images](./docs/BT.md)
-
-#### Utilisation de Docker Compose (recommandé)
-```shell
-# Télécharger le projet
-git clone https://github.com/Calcium-Ion/new-api.git
+```bash
+# Cloner le projet
+git clone https://github.com/QuantumNous/new-api.git
 cd new-api
-# Modifier docker-compose.yml si nécessaire
-# Démarrer
+
+# Modifier la configuration docker-compose.yml
+nano docker-compose.yml
+
+# Démarrer le service
 docker-compose up -d
 ```

-#### Utilisation directe de l'image Docker
-```shell
-# Utilisation de SQLite
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+<details>
+<summary><strong>Utilisation des commandes Docker</strong></summary>
+
+```bash
+# Tirer la dernière image
+docker pull calciumion/new-api:latest
+
+# Utilisation de SQLite (par défaut)
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest

 # Utilisation de MySQL
-docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
 ```

-## Nouvelle tentative de canal et cache
-La fonctionnalité de nouvelle tentative de canal a été implémentée, vous pouvez définir le nombre de tentatives dans `Paramètres->Paramètres de fonctionnement->Paramètres généraux->Nombre de tentatives en cas d'échec`, **recommandé d'activer la fonctionnalité de mise en cache**.
+> **💡 Astuce:** `-v ./data:/data` sauvegardera les données dans le dossier `data` du répertoire actuel, vous pouvez également le changer en chemin absolu comme `-v /your/custom/path:/data`

-### Méthode de configuration du cache
-1. `REDIS_CONN_STRING` : Définir Redis comme cache
-2. `MEMORY_CACHE_ENABLED` : Activer le cache mémoire (pas besoin de le définir manuellement si Redis est défini)
+</details>

-## Documentation de l'API
+---

-Pour une documentation détaillée de l'API, veuillez vous référer à [Documentation de l'API](https://docs.newapi.pro/api) :
+🎉 Après le déploiement, visitez `http://localhost:3000` pour commencer à utiliser!

- [API de discussion (Chat Completions)](https://docs.newapi.pro/api/openai-chat)
- [API de réponse (Responses)](https://docs.newapi.pro/api/openai-responses)
- [API d'image (Image)](https://docs.newapi.pro/api/openai-image)
- [API de rerank (Rerank)](https://docs.newapi.pro/api/jinaai-rerank)
- [API de discussion en temps réel (Realtime)](https://docs.newapi.pro/api/openai-realtime)
- [API de discussion Claude](https://docs.newapi.pro/api/anthropic-chat)
- [API de discussion Google Gemini](https://docs.newapi.pro/api/google-gemini-chat)
+📖 Pour plus de méthodes de déploiement, veuillez vous référer à [Guide de déploiement](https://docs.newapi.pro/en/docs/installation)

-## Projets connexes
- [One API](https://github.com/songquanpeng/one-api) : Projet original
- [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) : Prise en charge de l'interface Midjourney
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) : Interroger le quota d'utilisation avec une clé
+---

-Autres projets basés sur New API :
- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) : Version optimisée hautes performances de New API
+## 📚 Documentation

-## Aide et support
+<div align="center">

-Si vous avez des questions, veuillez vous référer à [Aide et support](https://docs.newapi.pro/support) :
- [Interaction avec la communauté](https://docs.newapi.pro/support/community-interaction)
- [Commentaires sur les problèmes](https://docs.newapi.pro/support/feedback-issues)
- [FAQ](https://docs.newapi.pro/support/faq)
+### 📖 [Documentation officielle](https://docs.newapi.pro/en/docs) | [![Demander à DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+
+</div>
+
+**Navigation rapide:**
+
+| Catégorie | Lien |
+|------|------|
+| 🚀 Guide de déploiement | [Documentation d'installation](https://docs.newapi.pro/en/docs/installation) |
+| ⚙️ Configuration de l'environnement | [Variables d'environnement](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables) |
+| 📡 Documentation de l'API | [Documentation de l'API](https://docs.newapi.pro/en/docs/api) |
+| ❓ FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
+| 💬 Interaction avec la communauté | [Canaux de communication](https://docs.newapi.pro/en/docs/support/community-interaction) |
+
+---
+
+## ✨ Fonctionnalités clés
+
+> Pour les fonctionnalités détaillées, veuillez vous référer à [Présentation des fonctionnalités](https://docs.newapi.pro/en/docs/guide/wiki/basic-concepts/features-introduction) |
+
+### 🎨 Fonctions principales
+
+| Fonctionnalité | Description |
+|------|------|
+| 🎨 Nouvelle interface utilisateur | Conception d'interface utilisateur moderne |
+| 🌍 Multilingue | Prend en charge le chinois, l'anglais, le français, le japonais |
+| 🔄 Compatibilité des données | Complètement compatible avec la base de données originale de One API |
+| 📈 Tableau de bord des données | Console visuelle et analyse statistique |
+| 🔒 Gestion des permissions | Regroupement de jetons, restrictions de modèles, gestion des utilisateurs |
+
+### 💰 Paiement et facturation
+
+- ✅ Recharge en ligne (EPay, Stripe)
+- ✅ Tarification des modèles de paiement à l'utilisation
+- ✅ Prise en charge de la facturation du cache (OpenAI, Azure, DeepSeek, Claude, Qwen et tous les modèles pris en charge)
+- ✅ Configuration flexible des politiques de facturation
+
+### 🔐 Autorisation et sécurité
+
+- 🤖 Connexion par autorisation LinuxDO
+- 📱 Connexion par autorisation Telegram
+- 🔑 Authentification unifiée OIDC
+
+### 🚀 Fonctionnalités avancées
+
+**Prise en charge des formats d'API:**
+- ⚡ [OpenAI Responses](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
+- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session) (y compris Azure)
+- ⚡ [Claude Messages](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
+- ⚡ [Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
+- 🔄 [Modèles Rerank](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) (Cohere, Jina)
+
+**Routage intelligent:**
+- ⚖️ Sélection aléatoire pondérée des canaux
+- 🔄 Nouvelle tentative automatique en cas d'échec
+- 🚦 Limitation du débit du modèle pour les utilisateurs
+
+**Conversion de format:**
+- 🔄 OpenAI ⇄ Claude Messages
+- 🔄 OpenAI ⇄ Gemini Chat
+- 🔄 Fonctionnalité de la pensée au contenu
+
+**Prise en charge de l'effort de raisonnement:**
+
+<details>
+<summary>Voir la configuration détaillée</summary>
+
+**Modèles de la série o d'OpenAI:**
+- `o3-mini-high` - Effort de raisonnement élevé
+- `o3-mini-medium` - Effort de raisonnement moyen
+- `o3-mini-low` - Effort de raisonnement faible
+
+**Modèles de pensée de Claude:**
+- `claude-3-7-sonnet-20250219-thinking` - Activer le mode de pensée
+
+**Modèles de la série Google Gemini:**
+- `gemini-2.5-flash-thinking` - Activer le mode de pensée
+- `gemini-2.5-flash-nothinking` - Désactiver le mode de pensée
+- `gemini-2.5-pro-thinking` - Activer le mode de pensée
+- `gemini-2.5-pro-thinking-128` - Activer le mode de pensée avec budget de pensée de 128 tokens
+- Vous pouvez également ajouter les suffixes `-low`, `-medium` ou `-high` aux modèles Gemini pour fixer le niveau d’effort de raisonnement (sans suffixe de budget supplémentaire).
+
+</details>
+
+---
+
+## 🤖 Prise en charge des modèles
+
+> Pour les détails, veuillez vous référer à [Documentation de l'API - Interface de relais](https://docs.newapi.pro/en/docs/api)
+
+| Type de modèle | Description | Documentation |
+|---------|------|------|
+| 🤖 OpenAI GPTs | série gpt-4-gizmo-* | - |
+| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/en/api/midjourney-proxy-image) |
+| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/en/api/suno-music) |
+| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) |
+| 💬 Claude | Format Messages | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message) |
+| 🌐 Gemini | Format Google Gemini | [Documentation](https://doc.newapi.pro/en/api/google-gemini-chat) |
+| 🔧 Dify | Mode ChatFlow | - |
+| 🎯 Personnalisé | Prise en charge de l'adresse d'appel complète | - |
+
+### 📡 Interfaces prises en charge
+
+<details>
+<summary>Voir la liste complète des interfaces</summary>
+
+- [Interface de discussion (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-chat-completion)
+- [Interface de réponse (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
+- [Interface d'image (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/v1-images-generations--post)
+- [Interface audio (Audio)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/create-transcription)
+- [Interface vidéo (Video)](https://docs.newapi.pro/en/docs/api/ai-model/videos/create-video-generation)
+- [Interface d'incorporation (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/create-embedding)
+- [Interface de rerank (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank)
+- [Conversation en temps réel (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session)
+- [Discussion Claude](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
+- [Discussion Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
+
+</details>
+
+---
+
+## 🚢 Déploiement
+
+> [!TIP]
+> **Dernière image Docker:** `calciumion/new-api:latest`
+
+### 📋 Exigences de déploiement
+
+| Composant | Exigence |
+|------|------|
+| **Base de données locale** | SQLite (Docker doit monter le répertoire `/data`)|
+| **Base de données distante | MySQL ≥ 5.7.8 ou PostgreSQL ≥ 9.6 |
+| **Moteur de conteneur** | Docker / Docker Compose |
+
+### ⚙️ Configuration des variables d'environnement
+
+<details>
+<summary>Configuration courante des variables d'environnement</summary>
+
+| Nom de variable | Description | Valeur par défaut |
+|--------|------|--------|
+| `SESSION_SECRET` | Secret de session (requis pour le déploiement multi-machines) |
+| `CRYPTO_SECRET` | Secret de chiffrement (requis pour Redis) | - |
+| `SQL_DSN` | Chaine de connexion à la base de données | - |
+| `REDIS_CONN_STRING` | Chaine de connexion Redis | - |
+| `STREAMING_TIMEOUT` | Délai d'expiration du streaming (secondes) | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | Taille max du buffer par ligne (Mo) pour le scanner SSE ; à augmenter quand les sorties image/base64 sont très volumineuses (ex. images 4K) | `64` |
+| `MAX_REQUEST_BODY_MB` | Taille maximale du corps de requête (Mo, comptée **après décompression** ; évite les requêtes énormes/zip bombs qui saturent la mémoire). Dépassement ⇒ `413` | `32` |
+| `AZURE_DEFAULT_API_VERSION` | Version de l'API Azure | `2025-04-01-preview` |
+| `ERROR_LOG_ENABLED` | Interrupteur du journal d'erreurs | `false` |
+| `PYROSCOPE_URL` | Adresse du serveur Pyroscope | - |
+| `PYROSCOPE_APP_NAME` | Nom de l'application Pyroscope | `new-api` |
+| `PYROSCOPE_BASIC_AUTH_USER` | Utilisateur Basic Auth Pyroscope | - |
+| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Mot de passe Basic Auth Pyroscope | - |
+| `PYROSCOPE_MUTEX_RATE` | Taux d'échantillonnage mutex Pyroscope | `5` |
+| `PYROSCOPE_BLOCK_RATE` | Taux d'échantillonnage block Pyroscope | `5` |
+| `HOSTNAME` | Nom d'hôte tagué pour Pyroscope | `new-api` |
+
+📖 **Configuration complète:** [Documentation des variables d'environnement](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables)
+
+</details>
+
+### 🔧 Méthodes de déploiement
+
+<details>
+<summary><strong>Méthode 1: Docker Compose (recommandé)</strong></summary>
+
+```bash
+# Cloner le projet
+git clone https://github.com/QuantumNous/new-api.git
+cd new-api
+
+# Modifier la configuration
+nano docker-compose.yml
+
+# Démarrer le service
+docker-compose up -d
+```
+
+</details>
+
+<details>
+<summary><strong>Méthode 2: Commandes Docker</strong></summary>
+
+**Utilisation de SQLite:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+**Utilisation de MySQL:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+> **💡 Explication du chemin:** 
+> - `./data:/data` - Chemin relatif, données sauvegardées dans le dossier data du répertoire actuel
+> - Vous pouvez également utiliser un chemin absolu, par exemple : `/your/custom/path:/data`
+
+</details>
+
+<details>
+<summary><strong>Méthode 3: Panneau BaoTa</strong></summary>
+
+1. Installez le panneau BaoTa (version **9.2.0** ou supérieure), recherchez **New-API** dans le magasin d'applications et installez-le.
+2. Recherchez **New-API** dans le magasin d'applications et installez-le.
+
+📖 [Tutoriel avec des images](./docs/BT.md)
+
+</details>
+
+### ⚠️ Considérations sur le déploiement multi-machines
+
+> [!WARNING]
+> - **Doit définir** `SESSION_SECRET` - Sinon l'état de connexion sera incohérent sur plusieurs machines
+> - **Redis partagé doit définir** `CRYPTO_SECRET` - Sinon les données ne pourront pas être déchiffrées
+
+### 🔄 Nouvelle tentative de canal et cache
+
+**Configuration de la nouvelle tentative:** `Paramètres → Paramètres de fonctionnement → Paramètres généraux → Nombre de tentatives en cas d'échec`
+
+**Configuration du cache:**
+- `REDIS_CONN_STRING`: Cache Redis (recommandé)
+- `MEMORY_CACHE_ENABLED`: Cache mémoire
+
+---
+
+## 🔗 Projets connexes
+
+### Projets en amont
+
+| Projet | Description |
+|------|------|
+| [One API](https://github.com/songquanpeng/one-api) | Base du projet original |
+| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Prise en charge de l'interface Midjourney |
+
+### Outils d'accompagnement
+
+| Projet | Description |
+|------|------|
+| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Outil de recherche de quota d'utilisation avec une clé |
+
+---
+
+## 💬 Aide et support
+
+### 📖 Ressources de documentation
+
+| Ressource | Lien |
+|------|------|
+| 📘 FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
+| 💬 Interaction avec la communauté | [Canaux de communication](https://docs.newapi.pro/en/docs/support/community-interaction) |
+| 🐛 Commentaires sur les problèmes | [Commentaires sur les problèmes](https://docs.newapi.pro/en/docs/support/feedback-issues) |
+| 📚 Documentation complète | [Documentation officielle](https://docs.newapi.pro/en/docs) |
+
+### 🤝 Guide de contribution
+
+Bienvenue à toutes les formes de contribution!
+
+- 🐛 Signaler des bogues
+- 💡 Proposer de nouvelles fonctionnalités
+- 📝 Améliorer la documentation
+- 🔧 Soumettre du code
+
+---

 ## 🌟 Historique des étoiles

-[![Graphique de l'historique des étoiles](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+<div align="center">
+
+[![Graphique de l'historique des étoiles](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+
+</div>
+
+---
+
+<div align="center">
+
+### 💖 Merci d'utiliser New API
+
+Si ce projet vous est utile, bienvenue à nous donner une ⭐️ Étoile！
+
+**[Documentation officielle](https://docs.newapi.pro/en/docs)** • **[Commentaires sur les problèmes](https://github.com/Calcium-Ion/new-api/issues)** • **[Dernière version](https://github.com/Calcium-Ion/new-api/releases)**
+
+<sub>Construit avec ❤️ par QuantumNous</sub>
+
+</div>
--- a/README.ja.md
+++ b/README.ja.md
@@ -1,19 +1,17 @@
-<p align="right">
-   <a href="./README.md">中文</a> | <a href="./README.en.md">English</a> | <a href="./README.fr.md">Français</a> | <strong>日本語</strong>
-</p>
-
-> [!NOTE]
-> **MT（機械翻訳）**: この文書は機械翻訳されています。最も正確な情報については、[中国語版](./README.md)を参照してください。
-
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥次世代大規模モデルゲートウェイとAI資産管理システム
+🍥 **次世代大規模モデルゲートウェイとAI資産管理システム**

-<a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<p align="center">
+  <a href="./README.md">中文</a> | 
+  <a href="./README.en.md">English</a> | 
+  <a href="./README.fr.md">Français</a> | 
+  <strong>日本語</strong>
+</p>

 <p align="center">
  <a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
@@ -32,6 +30,21 @@
    <img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
  </a>
 </p>
+
+<p align="center">
+  <a href="https://trendshift.io/repositories/8227" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
+  </a>
+</p>
+
+<p align="center">
+  <a href="#-クイックスタート">クイックスタート</a> •
+  <a href="#-主な機能">主な機能</a> •
+  <a href="#-デプロイ">デプロイ</a> •
+  <a href="#-ドキュメント">ドキュメント</a> •
+  <a href="#-ヘルプサポート">ヘルプ</a>
+</p>
+
 </div>

 ## 📝 プロジェクト説明
@@ -44,183 +57,404 @@
 > - ユーザーは、OpenAIの[利用規約](https://openai.com/policies/terms-of-use)および**法律法規**を遵守する必要があり、違法な目的で使用してはいけません。
 > - [《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm)の要求に従い、中国地域の公衆に未登録の生成式AI サービスを提供しないでください。

-<h2>🤝 信頼できるパートナー</h2>
-<p id="premium-sponsors">&nbsp;</p>
-<p align="center"><strong>順不同</strong></p>
+---
+
+## 🤝 信頼できるパートナー
+
 <p align="center">
-  <a href="https://www.cherry-ai.com/" target=_blank><img
-    src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="120"
-  /></a>
-  <a href="https://bda.pku.edu.cn/" target=_blank><img
-    src="./docs/images/pku.png" alt="北京大学" height="120"
-  /></a>
-  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target=_blank><img
-    src="./docs/images/ucloud.png" alt="UCloud 優刻得" height="120"
-  /></a>
-  <a href="https://www.aliyun.com/" target=_blank><img
-    src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="120"
-  /></a>
-  <a href="https://io.net/" target=_blank><img
-    src="./docs/images/io-net.png" alt="IO.NET" height="120"
-  /></a>
+  <em>順不同</em>
 </p>
-<p>&nbsp;</p>

-## 📚 ドキュメント
+<p align="center">
+  <a href="https://www.cherry-ai.com/" target="_blank">
+    <img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
+  </a>
+  <a href="https://bda.pku.edu.cn/" target="_blank">
+    <img src="./docs/images/pku.png" alt="北京大学" height="80" />
+  </a>
+  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
+    <img src="./docs/images/ucloud.png" alt="UCloud 優刻得" height="80" />
+  </a>
+  <a href="https://www.aliyun.com/" target="_blank">
+    <img src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="80" />
+  </a>
+  <a href="https://io.net/" target="_blank">
+    <img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
+  </a>
+</p>

-詳細なドキュメントは公式Wikiをご覧ください：[https://docs.newapi.pro/](https://docs.newapi.pro/)
+---

-AIが生成したDeepWikiにもアクセスできます：
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+## 🙏 特別な感謝

-## ✨ 主な機能
+<p align="center">
+  <a href="https://www.jetbrains.com/?from=new-api" target="_blank">
+    <img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
+  </a>
+</p>

-New APIは豊富な機能を提供しています。詳細な機能については[機能説明](https://docs.newapi.pro/wiki/features-introduction)を参照してください：
+<p align="center">
+  <strong>感謝 <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> が本プロジェクトに無料のオープンソース開発ライセンスを提供してくれたことに感謝します</strong>
+</p>

-1. 🎨 全く新しいUIインターフェース
-2. 🌍 多言語サポート
-3. 💰 オンラインチャージ機能をサポート、現在EPayとStripeをサポート
-4. 🔍 キーによる使用量クォータの照会をサポート（[neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)と連携）
-5. 🔄 オリジナルのOne APIデータベースと互換性あり
-6. 💵 モデルの従量課金をサポート
-7. ⚖️ チャネルの重み付けランダムをサポート
-8. 📈 データダッシュボード（コンソール）
-9. 🔒 トークングループ化、モデル制限
-10. 🤖 より多くの認証ログイン方法をサポート（LinuxDO、Telegram、OIDC）
-11. 🔄 Rerankモデルをサポート（CohereとJina）、[API ドキュメント](https://docs.newapi.pro/api/jinaai-rerank)
-12. ⚡ OpenAI Realtime APIをサポート（Azureチャネルを含む）、[APIドキュメント](https://docs.newapi.pro/api/openai-realtime)
-13. ⚡ **OpenAI Responses**形式をサポート、[APIドキュメント](https://docs.newapi.pro/api/openai-responses)
-14. ⚡ **Claude Messages**形式をサポート、[APIドキュメント](https://docs.newapi.pro/api/anthropic-chat)
-15. ⚡ **Google Gemini**形式をサポート、[APIドキュメント](https://docs.newapi.pro/api/google-gemini-chat/)
-16. 🧠 モデル名のサフィックスを通じてreasoning effortを設定することをサポート：
-    1. OpenAI oシリーズモデル
-        - `-high`サフィックスを追加してhigh reasoning effortに設定（例：`o3-mini-high`）
-        - `-medium`サフィックスを追加してmedium reasoning effortに設定（例：`o3-mini-medium`）
-        - `-low`サフィックスを追加してlow reasoning effortに設定（例：`o3-mini-low`）
-    2. Claude思考モデル
-        - `-thinking`サフィックスを追加して思考モードを有効にする（例：`claude-3-7-sonnet-20250219-thinking`）
-17. 🔄 思考からコンテンツへの機能
-18. 🔄 ユーザーに対するモデルレート制限機能
-19. 🔄 リクエストフォーマット変換機能、以下の3つのフォーマット変換をサポート：
-    1. OpenAI Chat Completions => Claude Messages
-    2. Claude Messages => OpenAI Chat Completions（Claude Codeがサードパーティモデルを呼び出す際に使用可能）
-    3. OpenAI Chat Completions => Gemini Chat
-20. 💰 キャッシュ課金サポート、有効にするとキャッシュがヒットした際に設定された比率で課金できます：
-    1. `システム設定-運営設定`で`プロンプトキャッシュ倍率`オプションを設定
-    2. チャネルで`プロンプトキャッシュ倍率`を設定、範囲は0-1、例えば0.5に設定するとキャッシュがヒットした際に50%で課金
-    3. サポートされているチャネル：
-        - [x] OpenAI
-        - [x] Azure
-        - [x] DeepSeek
-        - [x] Claude
+---

-## モデルサポート
+## 🚀 クイックスタート

-このバージョンは複数のモデルをサポートしています。詳細は[APIドキュメント-中継インターフェース](https://docs.newapi.pro/api)を参照してください：
+### Docker Composeを使用（推奨）

-1. サードパーティモデル **gpts**（gpt-4-gizmo-*）
-2. サードパーティチャネル[Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy)インターフェース、[APIドキュメント](https://docs.newapi.pro/api/midjourney-proxy-image)
-3. サードパーティチャネル[Suno API](https://github.com/Suno-API/Suno-API)インターフェース、[APIドキュメント](https://docs.newapi.pro/api/suno-music)
-4. カスタムチャネル、完全な呼び出しアドレスの入力をサポート
-5. Rerankモデル（[Cohere](https://cohere.ai/)と[Jina](https://jina.ai/)）、[APIドキュメント](https://docs.newapi.pro/api/jinaai-rerank)
-6. Claude Messages形式、[APIドキュメント](https://docs.newapi.pro/api/anthropic-chat)
-7. Google Gemini形式、[APIドキュメント](https://docs.newapi.pro/api/google-gemini-chat/)
-8. Dify、現在はchatflowのみをサポート
-9. その他のインターフェースについては[APIドキュメント](https://docs.newapi.pro/api)を参照してください
-
-## 環境変数設定
-
-詳細な設定説明については[インストールガイド-環境変数設定](https://docs.newapi.pro/installation/environment-variables)を参照してください：
-
- `GENERATE_DEFAULT_TOKEN`：新規登録ユーザーに初期トークンを生成するかどうか、デフォルトは`false`
- `STREAMING_TIMEOUT`：ストリーミング応答のタイムアウト時間、デフォルトは300秒
- `DIFY_DEBUG`：Difyチャネルがワークフローとノード情報を出力するかどうか、デフォルトは`true`
- `GET_MEDIA_TOKEN`：画像トークンを統計するかどうか、デフォルトは`true`
- `GET_MEDIA_TOKEN_NOT_STREAM`：非ストリーミングの場合に画像トークンを統計するかどうか、デフォルトは`true`
- `UPDATE_TASK`：非同期タスク（Midjourney、Suno）を更新するかどうか、デフォルトは`true`
- `GEMINI_VISION_MAX_IMAGE_NUM`：Geminiモデルの最大画像数、デフォルトは`16`
- `MAX_FILE_DOWNLOAD_MB`: 最大ファイルダウンロードサイズ、単位MB、デフォルトは`20`
- `CRYPTO_SECRET`：暗号化キー、Redisデータベースの内容を暗号化するために使用
- `AZURE_DEFAULT_API_VERSION`：Azureチャネルのデフォルトのバージョン、デフォルトは`2025-04-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`：メールなどの通知制限の継続時間、デフォルトは`10`分
- `NOTIFY_LIMIT_COUNT`：指定された継続時間内のユーザー通知の最大数、デフォルトは`2`
- `ERROR_LOG_ENABLED=true`: エラーログを記録して表示するかどうか、デフォルトは`false`
-
-## デプロイ
-
-詳細なデプロイガイドについては[インストールガイド-デプロイ方法](https://docs.newapi.pro/installation)を参照してください：
-
-> [!TIP]
-> 最新のDockerイメージ：`calciumion/new-api:latest`  
-
-### マルチマシンデプロイの注意事項
- 環境変数`SESSION_SECRET`を設定する必要があります。そうしないとマルチマシンデプロイ時にログイン状態が不一致になります
- Redisを共有する場合、`CRYPTO_SECRET`を設定する必要があります。そうしないとマルチマシンデプロイ時にRedisの内容を取得できません
-
-### デプロイ要件
- ローカルデータベース（デフォルト）：SQLite（Dockerデプロイの場合は`/data`ディレクトリをマウントする必要があります）
- リモートデータベース：MySQLバージョン >= 5.7.8、PgSQLバージョン >= 9.6
-
-### デプロイ方法
-
-#### 宝塔パネルのDocker機能を使用してデプロイ
-宝塔パネル（**9.2.0バージョン**以上）をインストールし、アプリケーションストアで**New-API**を見つけてインストールします。
-[画像付きチュートリアル](./docs/BT.md)
-
-#### Docker Composeを使用してデプロイ（推奨）
-```shell
-# プロジェクトをダウンロード
-git clone https://github.com/Calcium-Ion/new-api.git
+```bash
+# プロジェクトをクローン
+git clone https://github.com/QuantumNous/new-api.git
 cd new-api
-# 必要に応じてdocker-compose.ymlを編集
-# 起動
+
+# docker-compose.yml 設定を編集
+nano docker-compose.yml
+
+# サービスを起動
 docker-compose up -d
 ```

-#### Dockerイメージを直接使用
-```shell
-# SQLiteを使用
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+<details>
+<summary><strong>Dockerコマンドを使用</strong></summary>
+
+```bash
+# 最新のイメージをプル
+docker pull calciumion/new-api:latest
+
+# SQLiteを使用（デフォルト）
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest

 # MySQLを使用
-docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
 ```

-## チャネルリトライとキャッシュ
-チャネルリトライ機能はすでに実装されており、`設定->運営設定->一般設定->失敗リトライ回数`でリトライ回数を設定できます。**キャッシュ機能を有効にすることを推奨します**。
+> **💡 ヒント:** `-v ./data:/data` は現在のディレクトリの `data` フォルダにデータを保存します。絶対パスに変更することもできます：`-v /your/custom/path:/data`

-### キャッシュ設定方法
-1. `REDIS_CONN_STRING`：Redisをキャッシュとして設定
-2. `MEMORY_CACHE_ENABLED`：メモリキャッシュを有効にする（Redisを設定した場合は手動設定不要）
+</details>

-## APIドキュメント
+---

-詳細なAPIドキュメントについては[APIドキュメント](https://docs.newapi.pro/api)を参照してください：
+🎉 デプロイが完了したら、`http://localhost:3000` にアクセスして使用を開始してください！

- [チャットインターフェース（Chat Completions）](https://docs.newapi.pro/api/openai-chat)
- [レスポンスインターフェース（Responses）](https://docs.newapi.pro/api/openai-responses)
- [画像インターフェース（Image）](https://docs.newapi.pro/api/openai-image)
- [再ランク付けインターフェース（Rerank）](https://docs.newapi.pro/api/jinaai-rerank)
- [リアルタイム対話インターフェース（Realtime）](https://docs.newapi.pro/api/openai-realtime)
- [Claudeチャットインターフェース](https://docs.newapi.pro/api/anthropic-chat)
- [Google Geminiチャットインターフェース](https://docs.newapi.pro/api/google-gemini-chat)
+📖 その他のデプロイ方法については[デプロイガイド](https://docs.newapi.pro/ja/docs/installation)を参照してください。

-## 関連プロジェクト
- [One API](https://github.com/songquanpeng/one-api)：オリジナルプロジェクト
- [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy)：Midjourneyインターフェースサポート
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)：キーを使用して使用量クォータを照会
+---

-New APIベースのその他のプロジェクト：
- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon)：New API高性能最適化版
+## 📚 ドキュメント

-## ヘルプサポート
+<div align="center">

-問題がある場合は、[ヘルプサポート](https://docs.newapi.pro/support)を参照してください：
- [コミュニティ交流](https://docs.newapi.pro/support/community-interaction)
- [問題のフィードバック](https://docs.newapi.pro/support/feedback-issues)
- [よくある質問](https://docs.newapi.pro/support/faq)
+### 📖 [公式ドキュメント](https://docs.newapi.pro/ja/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)

-## 🌟 Star History
+</div>

-[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+**クイックナビゲーション:**

+| カテゴリ | リンク |
+|------|------|
+| 🚀 デプロイガイド | [インストールドキュメント](https://docs.newapi.pro/ja/docs/installation) |
+| ⚙️ 環境設定 | [環境変数](https://docs.newapi.pro/ja/docs/installation/config-maintenance/environment-variables) |
+| 📡 APIドキュメント | [APIドキュメント](https://docs.newapi.pro/ja/docs/api) |
+| ❓ よくある質問 | [FAQ](https://docs.newapi.pro/ja/docs/support/faq) |
+| 💬 コミュニティ交流 | [交流チャネル](https://docs.newapi.pro/ja/docs/support/community-interaction) |
+
+---
+
+## ✨ 主な機能
+
+> 詳細な機能については[機能説明](https://docs.newapi.pro/ja/docs/guide/wiki/basic-concepts/features-introduction)を参照してください。
+
+### 🎨 コア機能
+
+| 機能 | 説明 |
+|------|------|
+| 🎨 新しいUI | モダンなユーザーインターフェースデザイン |
+| 🌍 多言語 | 中国語、英語、フランス語、日本語をサポート |
+| 🔄 データ互換性 | オリジナルのOne APIデータベースと完全に互換性あり |
+| 📈 データダッシュボード | ビジュアルコンソールと統計分析 |
+| 🔒 権限管理 | トークングループ化、モデル制限、ユーザー管理 |
+
+### 💰 支払いと課金
+
+- ✅ オンライン充電（EPay、Stripe）
+- ✅ モデルの従量課金
+- ✅ キャッシュ課金サポート（OpenAI、Azure、DeepSeek、Claude、Qwenなどすべてのサポートされているモデル）
+- ✅ 柔軟な課金ポリシー設定
+
+### 🔐 認証とセキュリティ
+
+- 🤖 LinuxDO認証ログイン
+- 📱 Telegram認証ログイン
+- 🔑 OIDC統一認証
+
+
+
+### 🚀 高度な機能
+
+**APIフォーマットサポート:**
+- ⚡ [OpenAI Responses](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/create-response)
+- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)（Azureを含む）
+- ⚡ [Claude Messages](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
+- ⚡ [Google Gemini](https://doc.newapi.pro/ja/api/google-gemini-chat)
+- 🔄 [Rerankモデル](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)
+- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)
+- ⚡ [Claude Messages](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
+- ⚡ [Google Gemini](https://doc.newapi.pro/ja/api/google-gemini-chat)
+- 🔄 [Rerankモデル](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)（Cohere、Jina）
+
+**インテリジェントルーティング:**
+- ⚖️ チャネル重み付けランダム
+- 🔄 失敗自動リトライ
+- 🚦 ユーザーレベルモデルレート制限
+
+**フォーマット変換:**
+- 🔄 OpenAI ⇄ Claude Messages
+- 🔄 OpenAI ⇄ Gemini Chat
+- 🔄 思考からコンテンツへの機能
+
+**Reasoning Effort サポート:**
+
+<details>
+<summary>詳細設定を表示</summary>
+
+**OpenAIシリーズモデル:**
+- `o3-mini-high` - 高思考努力
+- `o3-mini-medium` - 中思考努力
+- `o3-mini-low` - 低思考努力
+- `gpt-5-high` - 高思考努力
+- `gpt-5-medium` - 中思考努力
+- `gpt-5-low` - 低思考努力
+
+**Claude思考モデル:**
+- `claude-3-7-sonnet-20250219-thinking` - 思考モードを有効にする
+
+**Google Geminiシリーズモデル:**
+- `gemini-2.5-flash-thinking` - 思考モードを有効にする
+- `gemini-2.5-flash-nothinking` - 思考モードを無効にする
+- `gemini-2.5-pro-thinking` - 思考モードを有効にする
+- `gemini-2.5-pro-thinking-128` - 思考モードを有効にし、思考予算を128トークンに設定する
+- Gemini モデル名の末尾に `-low` / `-medium` / `-high` を付けることで推論強度を直接指定できます（追加の思考予算サフィックスは不要です）。
+
+</details>
+
+---
+
+## 🤖 モデルサポート
+
+> 詳細については[APIドキュメント - 中継インターフェース](https://docs.newapi.pro/ja/docs/api)
+
+| モデルタイプ | 説明 | ドキュメント |
+|---------|------|------|
+| 🤖 OpenAI GPTs | gpt-4-gizmo-* シリーズ | - |
+| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [ドキュメント](https://doc.newapi.pro/ja/api/midjourney-proxy-image) |
+| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [ドキュメント](https://doc.newapi.pro/ja/api/suno-music) |
+| 🔄 Rerank | Cohere、Jina | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank) |
+| 💬 Claude | Messagesフォーマット | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message) |
+| 🌐 Gemini | Google Geminiフォーマット | [ドキュメント](https://doc.newapi.pro/ja/api/google-gemini-chat) |
+| 🔧 Dify | ChatFlowモード | - |
+| 🎯 カスタム | 完全な呼び出しアドレスの入力をサポート | - |
+
+### 📡 サポートされているインターフェース
+
+<details>
+<summary>完全なインターフェースリストを表示</summary>
+
+- [チャットインターフェース (Chat Completions)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/create-chat-completion)
+- [レスポンスインターフェース (Responses)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/create-response)
+- [イメージインターフェース (Image)](https://docs.newapi.pro/ja/docs/api/ai-model/images/openai/v1-images-generations--post)
+- [オーディオインターフェース (Audio)](https://docs.newapi.pro/ja/docs/api/ai-model/audio/openai/create-transcription)
+- [ビデオインターフェース (Video)](https://docs.newapi.pro/ja/docs/api/ai-model/videos/create-video-generation)
+- [エンベッドインターフェース (Embeddings)](https://docs.newapi.pro/ja/docs/api/ai-model/embeddings/create-embedding)
+- [再ランク付けインターフェース (Rerank)](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)
+- [リアルタイム対話インターフェース (Realtime)](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)
+- [Claudeチャット](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
+- [Google Geminiチャット](https://doc.newapi.pro/ja/api/google-gemini-chat)
+
+</details>
+
+---
+
+## 🚢 デプロイ
+
+> [!TIP]
+> **最新のDockerイメージ:** `calciumion/new-api:latest`
+
+### 📋 デプロイ要件
+
+| コンポーネント | 要件 |
+|------|------|
+| **ローカルデータベース** | SQLite（Dockerは `/data` ディレクトリをマウントする必要があります）|
+| **リモートデータベース** | MySQL ≥ 5.7.8 または PostgreSQL ≥ 9.6 |
+| **コンテナエンジン** | Docker / Docker Compose |
+
+### ⚙️ 環境変数設定
+
+<details>
+<summary>一般的な環境変数設定</summary>
+
+| 変数名 | 説明 | デフォルト値 |
+|--------|------|--------|
+| `SESSION_SECRET` | セッションシークレット（マルチマシンデプロイに必須） | - |
+| `CRYPTO_SECRET` | 暗号化シークレット（Redisに必須） | - |
+| `SQL_DSN** | データベース接続文字列 | - |
+| `REDIS_CONN_STRING` | Redis接続文字列 | - |
+| `STREAMING_TIMEOUT` | ストリーミング応答のタイムアウト時間（秒） | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | ストリームスキャナの1行あたりバッファ上限（MB）。4K画像など巨大なbase64 `data:` ペイロードを扱う場合は値を増加させてください | `64` |
+| `MAX_REQUEST_BODY_MB` | リクエストボディ最大サイズ（MB、**解凍後**に計測。巨大リクエスト/zip bomb によるメモリ枯渇を防止）。超過時は `413` | `32` |
+| `AZURE_DEFAULT_API_VERSION` | Azure APIバージョン | `2025-04-01-preview` |
+| `ERROR_LOG_ENABLED` | エラーログスイッチ | `false` |
+| `PYROSCOPE_URL` | Pyroscopeサーバーのアドレス | - |
+| `PYROSCOPE_APP_NAME` | Pyroscopeアプリ名 | `new-api` |
+| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope Basic Authユーザー | - |
+| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope Basic Authパスワード | - |
+| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutexサンプリング率 | `5` |
+| `PYROSCOPE_BLOCK_RATE` | Pyroscope blockサンプリング率 | `5` |
+| `HOSTNAME` | Pyroscope用のホスト名タグ | `new-api` |
+
+📖 **完全な設定:** [環境変数ドキュメント](https://docs.newapi.pro/ja/docs/installation/config-maintenance/environment-variables)
+
+</details>
+
+### 🔧 デプロイ方法
+
+<details>
+<summary><strong>方法 1: Docker Compose（推奨）</strong></summary>
+
+```bash
+# プロジェクトをクローン
+git clone https://github.com/QuantumNous/new-api.git
+cd new-api
+
+# 設定を編集
+nano docker-compose.yml
+
+# サービスを起動
+docker-compose up -d
+```
+
+</details>
+
+<details>
+<summary><strong>方法 2: Dockerコマンド</strong></summary>
+
+**SQLiteを使用:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+**MySQLを使用:**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+> **💡 パス説明:** 
+> - `./data:/data` - 相対パス、データは現在のディレクトリのdataフォルダに保存されます
+> - 絶対パスを使用することもできます：`/your/custom/path:/data`
+
+</details>
+
+<details>
+<summary><strong>方法 3: 宝塔パネル</strong></summary>
+
+1. 宝塔パネル（**9.2.0バージョン**以上）をインストールし、アプリケーションストアで**New-API**を検索してインストールします。
+
+📖 [画像付きチュートリアル](./docs/BT.md)
+
+</details>
+
+### ⚠️ マルチマシンデプロイの注意事項
+
+> [!WARNING]
+> - **必ず設定する必要があります** `SESSION_SECRET` - そうしないとマルチマシンデプロイ時にログイン状態が不一致になります
+> - **共有Redisは必ず設定する必要があります** `CRYPTO_SECRET` - そうしないとデータを復号化できません
+
+### 🔄 チャネルリトライとキャッシュ
+
+**リトライ設定:** `設定 → 運営設定 → 一般設定 → 失敗リトライ回数`
+
+**キャッシュ設定:**
+- `REDIS_CONN_STRING`：Redisキャッシュ（推奨）
+- `MEMORY_CACHE_ENABLED`：メモリキャッシュ
+
+---
+
+## 🔗 関連プロジェクト
+
+### 上流プロジェクト
+
+| プロジェクト | 説明 |
+|------|------|
+| [One API](https://github.com/songquanpeng/one-api) | オリジナルプロジェクトベース |
+| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourneyインターフェースサポート |
+
+### 補助ツール
+
+| プロジェクト | 説明 |
+|------|------|
+| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | キー使用量クォータ照会ツール |
+| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API高性能最適化版 |
+
+---
+
+## 💬 ヘルプサポート
+
+### 📖 ドキュメントリソース
+
+| リソース | リンク |
+|------|------|
+| 📘 よくある質問 | [FAQ](https://docs.newapi.pro/ja/docs/support/faq) |
+| 💬 コミュニティ交流 | [交流チャネル](https://docs.newapi.pro/ja/docs/support/community-interaction) |
+| 🐛 問題のフィードバック | [問題フィードバック](https://docs.newapi.pro/ja/docs/support/feedback-issues) |
+| 📚 完全なドキュメント | [公式ドキュメント](https://docs.newapi.pro/ja/docs) |
+
+### 🤝 貢献ガイド
+
+あらゆる形の貢献を歓迎します！
+
+- 🐛 バグを報告する
+- 💡 新しい機能を提案する
+- 📝 ドキュメントを改善する
+- 🔧 コードを提出する
+
+---
+
+## 🌟 スター履歴
+
+<div align="center">
+
+[![スター履歴チャート](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+
+</div>
+
+---
+
+<div align="center">
+
+### 💖 New APIをご利用いただきありがとうございます
+
+このプロジェクトがあなたのお役に立てたなら、ぜひ ⭐️ スターをください！
+
+**[公式ドキュメント](https://docs.newapi.pro/ja/docs)** • **[問題フィードバック](https://github.com/Calcium-Ion/new-api/issues)** • **[最新リリース](https://github.com/Calcium-Ion/new-api/releases)**
+
+<sub>❤️ で構築された QuantumNous</sub>
+
+</div>
--- a/README.md
+++ b/README.md
@@ -1,15 +1,17 @@
-<p align="right">
-   <strong>中文</strong> | <a href="./README.en.md">English</a> | <a href="./README.fr.md">Français</a> | <a href="./README.ja.md">日本語</a>
-</p>
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥新一代大模型网关与AI资产管理系统
+🍥 **新一代大模型网关与AI资产管理系统**

-<a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<p align="center">
+  <strong>中文</strong> | 
+  <a href="./README.en.md">English</a> | 
+  <a href="./README.fr.md">Français</a> | 
+  <a href="./README.ja.md">日本語</a>
+</p>

 <p align="center">
  <a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
@@ -28,200 +30,429 @@
    <img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
  </a>
 </p>
+
+<p align="center">
+  <a href="https://trendshift.io/repositories/8227" target="_blank">
+    <img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
+  </a>
+</p>
+
+<p align="center">
+  <a href="#-快速开始">快速开始</a> •
+  <a href="#-主要特性">主要特性</a> •
+  <a href="#-部署">部署</a> •
+  <a href="#-文档">文档</a> •
+  <a href="#-帮助支持">帮助</a>
+</p>
+
 </div>

 ## 📝 项目说明

 > [!NOTE]  
-> 本项目为开源项目，在[One API](https://github.com/songquanpeng/one-api)的基础上进行二次开发
+> 本项目为开源项目，在 [One API](https://github.com/songquanpeng/one-api) 的基础上进行二次开发

 > [!IMPORTANT]  
-> - 本项目仅供个人学习使用，不保证稳定性，且不提供任何技术支持。
-> - 使用者必须在遵循 OpenAI 的[使用条款](https://openai.com/policies/terms-of-use)以及**法律法规**的情况下使用，不得用于非法用途。
-> - 根据[《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm)的要求，请勿对中国地区公众提供一切未经备案的生成式人工智能服务。
+> - 本项目仅供个人学习使用，不保证稳定性，且不提供任何技术支持
+> - 使用者必须在遵循 OpenAI 的 [使用条款](https://openai.com/policies/terms-of-use) 以及**法律法规**的情况下使用，不得用于非法用途
+> - 根据 [《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm) 的要求，请勿对中国地区公众提供一切未经备案的生成式人工智能服务
+
+---
+
+## 🤝 我们信任的合作伙伴

-<h2>🤝 我们信任的合作伙伴</h2>
-<p id="premium-sponsors">&nbsp;</p>
-<p align="center"><strong>排名不分先后</strong></p>
 <p align="center">
-  <a href="https://www.cherry-ai.com/" target=_blank><img
-    src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="120"
-  /></a>
-  <a href="https://bda.pku.edu.cn/" target=_blank><img
-    src="./docs/images/pku.png" alt="北京大学" height="120"
-  /></a>
-  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target=_blank><img
-    src="./docs/images/ucloud.png" alt="UCloud 优刻得" height="120"
-  /></a>
-  <a href="https://www.aliyun.com/" target=_blank><img
-    src="./docs/images/aliyun.png" alt="阿里云" height="120"
-  /></a>
-  <a href="https://io.net/" target=_blank><img
-    src="./docs/images/io-net.png" alt="IO.NET" height="120"
-  /></a>
+  <em>排名不分先后</em>
 </p>
-<p>&nbsp;</p>
+
+<p align="center">
+  <a href="https://www.cherry-ai.com/" target="_blank">
+    <img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
+  </a>
+  <a href="https://bda.pku.edu.cn/" target="_blank">
+    <img src="./docs/images/pku.png" alt="北京大学" height="80" />
+  </a>
+  <a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
+    <img src="./docs/images/ucloud.png" alt="UCloud 优刻得" height="80" />
+  </a>
+  <a href="https://www.aliyun.com/" target="_blank">
+    <img src="./docs/images/aliyun.png" alt="阿里云" height="80" />
+  </a>
+  <a href="https://io.net/" target="_blank">
+    <img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
+  </a>
+</p>
+
+---
+
+## 🙏 特别鸣谢
+
+<p align="center">
+  <a href="https://www.jetbrains.com/?from=new-api" target="_blank">
+    <img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
+  </a>
+</p>
+
+<p align="center">
+  <strong>感谢 <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> 为本项目提供免费的开源开发许可证</strong>
+</p>
+
+---
+
+## 🚀 快速开始
+
+### 使用 Docker Compose（推荐）
+
+```bash
+# 克隆项目
+git clone https://github.com/QuantumNous/new-api.git
+cd new-api
+
+# 编辑 docker-compose.yml 配置
+nano docker-compose.yml
+
+# 启动服务
+docker-compose up -d
+```
+
+<details>
+<summary><strong>使用 Docker 命令</strong></summary>
+
+```bash
+# 拉取最新镜像
+docker pull calciumion/new-api:latest
+
+# 使用 SQLite（默认）
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+
+# 使用 MySQL
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```
+
+> **💡 提示：** `-v ./data:/data` 会将数据保存在当前目录的 `data` 文件夹中，你也可以改为绝对路径如 `-v /your/custom/path:/data`
+
+</details>
+
+---
+
+🎉 部署完成后，访问 `http://localhost:3000` 即可使用！
+
+📖 更多部署方式请参考 [部署指南](https://docs.newapi.pro/zh/docs/installation)
+
+---

 ## 📚 文档

-详细文档请访问我们的官方Wiki：[https://docs.newapi.pro/](https://docs.newapi.pro/)
+<div align="center">

-也可访问AI生成的DeepWiki:
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+### 📖 [官方文档](https://docs.newapi.pro/zh/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
+
+</div>
+
+**快速导航：**
+
+| 分类 | 链接 |
+|------|------|
+| 🚀 部署指南 | [安装文档](https://docs.newapi.pro/zh/docs/installation) |
+| ⚙️ 环境配置 | [环境变量](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables) |
+| 📡 接口文档 | [API 文档](https://docs.newapi.pro/zh/docs/api) |
+| ❓ 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
+| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
+
+---

 ## ✨ 主要特性

-New API提供了丰富的功能，详细特性请参考[特性说明](https://docs.newapi.pro/wiki/features-introduction)：
+> 详细特性请参考 [特性说明](https://docs.newapi.pro/zh/docs/guide/wiki/basic-concepts/features-introduction)

-1. 🎨 全新的UI界面
-2. 🌍 多语言支持
-3. 💰 支持在线充值功能，当前支持易支付和Stripe
-4. 🔍 支持用key查询使用额度（配合[neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)）
-5. 🔄 兼容原版One API的数据库
-6. 💵 支持模型按次数收费
-7. ⚖️ 支持渠道加权随机
-8. 📈 数据看板（控制台）
-9. 🔒 令牌分组、模型限制
-10. 🤖 支持更多授权登陆方式（LinuxDO,Telegram、OIDC）
-11. 🔄 支持Rerank模型（Cohere和Jina），[接口文档](https://docs.newapi.pro/api/jinaai-rerank)
-12. ⚡ 支持OpenAI Realtime API（包括Azure渠道），[接口文档](https://docs.newapi.pro/api/openai-realtime)
-13. ⚡ 支持 **OpenAI Responses** 格式，[接口文档](https://docs.newapi.pro/api/openai-responses)
-14. ⚡ 支持 **Claude Messages** 格式，[接口文档](https://docs.newapi.pro/api/anthropic-chat)
-15. ⚡ 支持 **Google Gemini** 格式，[接口文档](https://docs.newapi.pro/api/google-gemini-chat/)
-16. 🧠 支持通过模型名称后缀设置 reasoning effort：
-    1. OpenAI o系列模型
-        - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
-        - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
-        - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
-    2. Claude 思考模型
-        - 添加后缀 `-thinking` 启用思考模式 (例如: `claude-3-7-sonnet-20250219-thinking`)
-17. 🔄 思考转内容功能
-18. 🔄 针对用户的模型限流功能
-19. 🔄 请求格式转换功能，支持以下三种格式转换：
-    1. OpenAI Chat Completions => Claude Messages （OpenAI格式调用Claude模型）
-    2. Clade Messages => OpenAI Chat Completions (可用于Claude Code调用第三方模型)
-    3. OpenAI Chat Completions => Gemini Chat （OpenAI格式调用Gemini模型）
-20. 💰 缓存计费支持，开启后可以在缓存命中时按照设定的比例计费：
-    1. 在 `系统设置-运营设置` 中设置 `提示缓存倍率` 选项
-    2. 在渠道中设置 `提示缓存倍率`，范围 0-1，例如设置为 0.5 表示缓存命中时按照 50% 计费
-    3. 支持的渠道：
-        - [x] OpenAI
-        - [x] Azure
-        - [x] DeepSeek
-        - [x] Claude
+### 🎨 核心功能

-## 模型支持
+| 特性 | 说明 |
+|------|------|
+| 🎨 全新 UI | 现代化的用户界面设计 |
+| 🌍 多语言 | 支持中文、英文、法语、日语 |
+| 🔄 数据兼容 | 完全兼容原版 One API 数据库 |
+| 📈 数据看板 | 可视化控制台与统计分析 |
+| 🔒 权限管理 | 令牌分组、模型限制、用户管理 |

-此版本支持多种模型，详情请参考[接口文档-中继接口](https://docs.newapi.pro/api)：
+### 💰 支付与计费

-1. 第三方模型 **gpts** （gpt-4-gizmo-*）
-2. 第三方渠道[Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy)接口，[接口文档](https://docs.newapi.pro/api/midjourney-proxy-image)
-3. 第三方渠道[Suno API](https://github.com/Suno-API/Suno-API)接口，[接口文档](https://docs.newapi.pro/api/suno-music)
-4. 自定义渠道，支持填入完整调用地址
-5. Rerank模型（[Cohere](https://cohere.ai/)和[Jina](https://jina.ai/)），[接口文档](https://docs.newapi.pro/api/jinaai-rerank)
-6. Claude Messages 格式，[接口文档](https://docs.newapi.pro/api/anthropic-chat)
-7. Google Gemini格式，[接口文档](https://docs.newapi.pro/api/google-gemini-chat/)
-8. Dify，当前仅支持chatflow
-9. 更多接口请参考[接口文档](https://docs.newapi.pro/api)
+- ✅ 在线充值（易支付、Stripe）
+- ✅ 模型按次数收费
+- ✅ 缓存计费支持（OpenAI、Azure、DeepSeek、Claude、Qwen等所有支持的模型）
+- ✅ 灵活的计费策略配置

-## 环境变量配置
+### 🔐 授权与安全

-详细配置说明请参考[安装指南-环境变量配置](https://docs.newapi.pro/installation/environment-variables)：
+- 😈 Discord 授权登录
+- 🤖 LinuxDO 授权登录
+- 📱 Telegram 授权登录
+- 🔑 OIDC 统一认证
+- 🔍 Key 查询使用额度（配合 [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)）

- `GENERATE_DEFAULT_TOKEN`：是否为新注册用户生成初始令牌，默认为 `false`
- `STREAMING_TIMEOUT`：流式回复超时时间，默认300秒
- `DIFY_DEBUG`：Dify渠道是否输出工作流和节点信息，默认 `true`
- `GET_MEDIA_TOKEN`：是否统计图片token，默认 `true`
- `GET_MEDIA_TOKEN_NOT_STREAM`：非流情况下是否统计图片token，默认 `true`
- `UPDATE_TASK`：是否更新异步任务（Midjourney、Suno），默认 `true`
- `GEMINI_VISION_MAX_IMAGE_NUM`：Gemini模型最大图片数量，默认 `16`
- `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小，单位MB，默认 `20`
- `CRYPTO_SECRET`：加密密钥，用于加密Redis数据库内容
- `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，默认 `2025-04-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`：邮件等通知限制持续时间，默认 `10`分钟
- `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认 `2`
- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志，默认`false`
+### 🚀 高级功能

-## 部署
+**API 格式支持：**
+- ⚡ [OpenAI Responses](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-response)
+- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/create-realtime-session)（含 Azure）
+- ⚡ [Claude Messages](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message)
+- ⚡ [Google Gemini](https://doc.newapi.pro/api/google-gemini-chat)
+- 🔄 [Rerank 模型](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank)（Cohere、Jina）

-详细部署指南请参考[安装指南-部署方式](https://docs.newapi.pro/installation)：
+**智能路由：**
+- ⚖️ 渠道加权随机
+- 🔄 失败自动重试
+- 🚦 用户级别模型限流
+
+**格式转换：**
+- 🔄 OpenAI ⇄ Claude Messages
+- 🔄 OpenAI ⇄ Gemini Chat
+- 🔄 思考转内容功能
+
+**Reasoning Effort 支持：**
+
+<details>
+<summary>查看详细配置</summary>
+
+**OpenAI 系列模型：**
+- `o3-mini-high` - High reasoning effort
+- `o3-mini-medium` - Medium reasoning effort
+- `o3-mini-low` - Low reasoning effort
+- `gpt-5-high` - High reasoning effort
+- `gpt-5-medium` - Medium reasoning effort
+- `gpt-5-low` - Low reasoning effort
+
+**Claude 思考模型：**
+- `claude-3-7-sonnet-20250219-thinking` - 启用思考模式
+
+**Google Gemini 系列模型：**
+- `gemini-2.5-flash-thinking` - 启用思考模式
+- `gemini-2.5-flash-nothinking` - 禁用思考模式
+- `gemini-2.5-pro-thinking` - 启用思考模式
+- `gemini-2.5-pro-thinking-128` - 启用思考模式，并设置思考预算为128tokens
+- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度（无需再设置思考预算后缀）
+
+</details>
+
+---
+
+## 🤖 模型支持
+
+> 详情请参考 [接口文档 - 中继接口](https://docs.newapi.pro/zh/docs/api)
+
+| 模型类型 | 说明 | 文档 |
+|---------|------|------|
+| 🤖 OpenAI GPTs | gpt-4-gizmo-* 系列 | - |
+| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [文档](https://doc.newapi.pro/api/midjourney-proxy-image) |
+| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [文档](https://doc.newapi.pro/api/suno-music) |
+| 🔄 Rerank | Cohere、Jina | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank) |
+| 💬 Claude | Messages 格式 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message) |
+| 🌐 Gemini | Google Gemini 格式 | [文档](https://doc.newapi.pro/api/google-gemini-chat) |
+| 🔧 Dify | ChatFlow 模式 | - |
+| 🎯 自定义 | 支持完整调用地址 | - |
+
+### 📡 支持的接口
+
+<details>
+<summary>查看完整接口列表</summary>
+
+- [聊天接口 (Chat Completions)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-chat-completion)
+- [响应接口 (Responses)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-response)
+- [图像接口 (Image)](https://docs.newapi.pro/zh/docs/api/ai-model/images/openai/v1-images-generations--post)
+- [音频接口 (Audio)](https://docs.newapi.pro/zh/docs/api/ai-model/audio/openai/create-transcription)
+- [视频接口 (Video)](https://docs.newapi.pro/zh/docs/api/ai-model/videos/create-video-generation)
+- [嵌入接口 (Embeddings)](https://docs.newapi.pro/zh/docs/api/ai-model/embeddings/create-embedding)
+- [重排序接口 (Rerank)](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank)
+- [实时对话 (Realtime)](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/create-realtime-session)
+- [Claude 聊天](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message)
+- [Google Gemini 聊天](https://doc.newapi.pro/api/google-gemini-chat)
+
+</details>
+
+---
+
+## 🚢 部署

 > [!TIP]
-> 最新版Docker镜像：`calciumion/new-api:latest`  
+> **最新版 Docker 镜像：** `calciumion/new-api:latest`

-### 多机部署注意事项
- 必须设置环境变量 `SESSION_SECRET`，否则会导致多机部署时登录状态不一致
- 如果公用Redis，必须设置 `CRYPTO_SECRET`，否则会导致多机部署时Redis内容无法获取
+### 📋 部署要求

-### 部署要求
- 本地数据库（默认）：SQLite（Docker部署必须挂载`/data`目录）
- 远程数据库：MySQL版本 >= 5.7.8，PgSQL版本 >= 9.6
+| 组件 | 要求 |
+|------|------|
+| **本地数据库** | SQLite（Docker 需挂载 `/data` 目录）|
+| **远程数据库** | MySQL ≥ 5.7.8 或 PostgreSQL ≥ 9.6 |
+| **容器引擎** | Docker / Docker Compose |

-### 部署方式
+### ⚙️ 环境变量配置

-#### 使用宝塔面板Docker功能部署
-安装宝塔面板（**9.2.0版本**及以上），在应用商店中找到**New-API**安装即可。
-[图文教程](./docs/BT.md)
+<details>
+<summary>常用环境变量配置</summary>

-#### 使用Docker Compose部署（推荐）
-```shell
-# 下载项目源码
+| 变量名 | 说明                                                           | 默认值 |
+|--------|--------------------------------------------------------------|--------|
+| `SESSION_SECRET` | 会话密钥（多机部署必须）                                                 | - |
+| `CRYPTO_SECRET` | 加密密钥（Redis 必须）                                               | - |
+| `SQL_DSN` | 数据库连接字符串                                                     | - |
+| `REDIS_CONN_STRING` | Redis 连接字符串                                                  | - |
+| `STREAMING_TIMEOUT` | 流式超时时间（秒）                                                    | `300` |
+| `STREAM_SCANNER_MAX_BUFFER_MB` | 流式扫描器单行最大缓冲（MB），图像生成等超大 `data:` 片段（如 4K 图片 base64）需适当调大 | `64` |
+| `MAX_REQUEST_BODY_MB` | 请求体最大大小（MB，**解压后**计；防止超大请求/zip bomb 导致内存暴涨），超过将返回 `413` | `32` |
+| `AZURE_DEFAULT_API_VERSION` | Azure API 版本                                                 | `2025-04-01-preview` |
+| `ERROR_LOG_ENABLED` | 错误日志开关                                                       | `false` |
+| `PYROSCOPE_URL` | Pyroscope 服务地址                                            | - |
+| `PYROSCOPE_APP_NAME` | Pyroscope 应用名                                        | `new-api` |
+| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope Basic Auth 用户名                        | - |
+| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope Basic Auth 密码                  | - |
+| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex 采样率                               | `5` |
+| `PYROSCOPE_BLOCK_RATE` | Pyroscope block 采样率                               | `5` |
+| `HOSTNAME` | Pyroscope 标签里的主机名                                          | `new-api` |
+
+📖 **完整配置：** [环境变量文档](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables)
+
+</details>
+
+### 🔧 部署方式
+
+<details>
+<summary><strong>方式 1：Docker Compose（推荐）</strong></summary>
+
+```bash
+# 克隆项目
 git clone https://github.com/QuantumNous/new-api.git
-
-# 进入项目目录
 cd new-api

-# 根据需要编辑 docker-compose.yml 文件
-# 使用nano编辑器
+# 编辑配置
 nano docker-compose.yml
-# 或使用vim编辑器
-# vim docker-compose.yml

+# 启动服务
+docker-compose up -d
 ```

-#### 直接使用Docker镜像
-```shell
-# 使用SQLite
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+</details>

-# 使用MySQL
-docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
+<details>
+<summary><strong>方式 2：Docker 命令</strong></summary>
+
+**使用 SQLite：**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
 ```

-## 渠道重试与缓存
-渠道重试功能已经实现，可以在`设置->运营设置->通用设置->失败重试次数`设置重试次数，**建议开启缓存**功能。
+**使用 MySQL：**
+```bash
+docker run --name new-api -d --restart always \
+  -p 3000:3000 \
+  -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
+  -e TZ=Asia/Shanghai \
+  -v ./data:/data \
+  calciumion/new-api:latest
+```

-### 缓存设置方法
-1. `REDIS_CONN_STRING`：设置Redis作为缓存
-2. `MEMORY_CACHE_ENABLED`：启用内存缓存（设置了Redis则无需手动设置）
+> **💡 路径说明：** 
+> - `./data:/data` - 相对路径，数据保存在当前目录的 data 文件夹
+> - 也可使用绝对路径，如：`/your/custom/path:/data`

-## 接口文档
+</details>

-详细接口文档请参考[接口文档](https://docs.newapi.pro/api)：
+<details>
+<summary><strong>方式 3：宝塔面板</strong></summary>

- [聊天接口（Chat Completions）](https://docs.newapi.pro/api/openai-chat)
- [响应接口 （Responses）](https://docs.newapi.pro/api/openai-responses)
- [图像接口（Image）](https://docs.newapi.pro/api/openai-image)
- [重排序接口（Rerank）](https://docs.newapi.pro/api/jinaai-rerank)
- [实时对话接口（Realtime）](https://docs.newapi.pro/api/openai-realtime)
- [Claude聊天接口](https://docs.newapi.pro/api/anthropic-chat)
- [Google Gemini聊天接口](https://docs.newapi.pro/api/google-gemini-chat)
+1. 安装宝塔面板（≥ 9.2.0 版本）
+2. 在应用商店搜索 **New-API**
+3. 一键安装

-## 相关项目
- [One API](https://github.com/songquanpeng/one-api)：原版项目
- [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy)：Midjourney接口支持
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)：用key查询使用额度
+📖 [图文教程](./docs/BT.md)

-其他基于New API的项目：
- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon)：New API高性能优化版
+</details>

-## 帮助支持
+### ⚠️ 多机部署注意事项

-如有问题，请参考[帮助支持](https://docs.newapi.pro/support)：
- [社区交流](https://docs.newapi.pro/support/community-interaction)
- [反馈问题](https://docs.newapi.pro/support/feedback-issues)
- [常见问题](https://docs.newapi.pro/support/faq)
+> [!WARNING]
+> - **必须设置** `SESSION_SECRET` - 否则登录状态不一致
+> - **公用 Redis 必须设置** `CRYPTO_SECRET` - 否则数据无法解密
+
+### 🔄 渠道重试与缓存
+
+**重试配置：** `设置 → 运营设置 → 通用设置 → 失败重试次数`
+
+**缓存配置：**
+- `REDIS_CONN_STRING`：Redis 缓存（推荐）
+- `MEMORY_CACHE_ENABLED`：内存缓存
+
+---
+
+## 🔗 相关项目
+
+### 上游项目
+
+| 项目 | 说明 |
+|------|------|
+| [One API](https://github.com/songquanpeng/one-api) | 原版项目基础 |
+| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney 接口支持 |
+
+### 配套工具
+
+| 项目 | 说明 |
+|------|------|
+| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key 额度查询工具 |
+| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API 高性能优化版 |
+
+---
+
+## 💬 帮助支持
+
+### 📖 文档资源
+
+| 资源 | 链接 |
+|------|------|
+| 📘 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
+| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
+| 🐛 反馈问题 | [问题反馈](https://docs.newapi.pro/zh/docs/support/feedback-issues) |
+| 📚 完整文档 | [官方文档](https://docs.newapi.pro/zh/docs) |
+
+### 🤝 贡献指南
+
+欢迎各种形式的贡献！
+
+- 🐛 报告 Bug
+- 💡 提出新功能
+- 📝 改进文档
+- 🔧 提交代码
+
+---

 ## 🌟 Star History

+<div align="center">
+
 [![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+
+</div>
+
+---
+
+<div align="center">
+
+### 💖 感谢使用 New API
+
+如果这个项目对你有帮助，欢迎给我们一个 ⭐️ Star！
+
+**[官方文档](https://docs.newapi.pro/zh/docs)** • **[问题反馈](https://github.com/Calcium-Ion/new-api/issues)** • **[最新发布](https://github.com/Calcium-Ion/new-api/releases)**
+
+<sub>Built with ❤️ by QuantumNous</sub>
+
+</div>
--- a/common/api_type.go
+++ b/common/api_type.go
@@ -71,6 +71,8 @@ func ChannelType2APIType(channelType int) (int, bool) {
 		apiType = constant.APITypeSubmodel
 	case constant.ChannelTypeMiniMax:
 		apiType = constant.APITypeMiniMax
+	case constant.ChannelTypeReplicate:
+		apiType = constant.APITypeReplicate
 	}
 	if apiType == -1 {
 		return constant.APITypeOpenAI, false
--- a/common/audio.go
+++ b/common/audio.go
@@ -71,15 +71,66 @@ func getMP3Duration(r io.Reader) (float64, error) {

 // getWAVDuration 解析 WAV 文件头以获取时长。
 func getWAVDuration(r io.ReadSeeker) (float64, error) {
+	// 1. 强制复位指针
+	r.Seek(0, io.SeekStart)
+
 	dec := wav.NewDecoder(r)
+
+	// IsValidFile 会读取 fmt 块
 	if !dec.IsValidFile() {
 		return 0, errors.New("invalid wav file")
 	}
-	d, err := dec.Duration()
-	if err != nil {
-		return 0, errors.Wrap(err, "failed to get wav duration")
+
+	// 尝试寻找 data 块
+	if err := dec.FwdToPCM(); err != nil {
+		return 0, errors.Wrap(err, "failed to find PCM data chunk")
 	}
-	return d.Seconds(), nil
+
+	pcmSize := int64(dec.PCMSize)
+
+	// 如果读出来的 Size 是 0，尝试用文件大小反推
+	if pcmSize == 0 {
+		// 获取文件总大小
+		currentPos, _ := r.Seek(0, io.SeekCurrent) // 当前通常在 data chunk header 之后
+		endPos, _ := r.Seek(0, io.SeekEnd)
+		fileSize := endPos
+
+		// 恢复位置（虽然如果不继续读也没关系）
+		r.Seek(currentPos, io.SeekStart)
+
+		// 数据区大小 ≈ 文件总大小 - 当前指针位置(即Header大小)
+		// 注意：FwdToPCM 成功后，CurrentPos 应该刚好指向 Data 区数据的开始
+		// 或者是 Data Chunk ID + Size 之后。
+		// WAV Header 一般 44 字节。
+		if fileSize > 44 {
+			// 如果 FwdToPCM 成功，Reader 应该位于 data 块的数据起始处
+			// 所以剩余的所有字节理论上都是音频数据
+			pcmSize = fileSize - currentPos
+
+			// 简单的兜底：如果算出来还是负数或0，强制按文件大小-44计算
+			if pcmSize <= 0 {
+				pcmSize = fileSize - 44
+			}
+		}
+	}
+
+	numChans := int64(dec.NumChans)
+	bitDepth := int64(dec.BitDepth)
+	sampleRate := float64(dec.SampleRate)
+
+	if sampleRate == 0 || numChans == 0 || bitDepth == 0 {
+		return 0, errors.New("invalid wav header metadata")
+	}
+
+	bytesPerFrame := numChans * (bitDepth / 8)
+	if bytesPerFrame == 0 {
+		return 0, errors.New("invalid byte depth calculation")
+	}
+
+	totalFrames := pcmSize / bytesPerFrame
+
+	durationSeconds := float64(totalFrames) / sampleRate
+	return durationSeconds, nil
 }

 // getFLACDuration 解析 FLAC 文件的 STREAMINFO 块。
--- a/common/constants.go
+++ b/common/constants.go
@@ -121,6 +121,9 @@ var BatchUpdateInterval int

 var RelayTimeout int // unit is second

+var RelayMaxIdleConns int
+var RelayMaxIdleConnsPerHost int
+
 var GeminiSafetySetting string

 // https://docs.cohere.com/docs/safety-modes Type; NONE/CONTEXTUAL/STRICT
@@ -159,14 +162,15 @@ var (
 	GlobalWebRateLimitNum      int
 	GlobalWebRateLimitDuration int64

+	CriticalRateLimitEnable   bool
+	CriticalRateLimitNum            = 20
+	CriticalRateLimitDuration int64 = 20 * 60
+
 	UploadRateLimitNum            = 10
 	UploadRateLimitDuration int64 = 60

 	DownloadRateLimitNum            = 10
 	DownloadRateLimitDuration int64 = 60
-
-	CriticalRateLimitNum            = 20
-	CriticalRateLimitDuration int64 = 20 * 60
 )

 var RateLimitKeyExpirationDuration = 20 * time.Minute
--- a/common/email.go
+++ b/common/email.go
@@ -32,7 +32,7 @@ func SendEmail(subject string, receiver string, content string) error {
 	}
 	encodedSubject := fmt.Sprintf("=?UTF-8?B?%s?=", base64.StdEncoding.EncodeToString([]byte(subject)))
 	mail := []byte(fmt.Sprintf("To: %s\r\n"+
-		"From: %s<%s>\r\n"+
+		"From: %s <%s>\r\n"+
 		"Subject: %s\r\n"+
 		"Date: %s\r\n"+
 		"Message-ID: %s\r\n"+ // 添加 Message-ID 头
--- a/common/embed-file-system.go
+++ b/common/embed-file-system.go
@@ -4,6 +4,7 @@ import (
 	"embed"
 	"io/fs"
 	"net/http"
+	"os"

 	"github.com/gin-contrib/static"
 )
@@ -14,7 +15,7 @@ type embedFileSystem struct {
 	http.FileSystem
 }

-func (e embedFileSystem) Exists(prefix string, path string) bool {
+func (e *embedFileSystem) Exists(prefix string, path string) bool {
 	_, err := e.Open(path)
 	if err != nil {
 		return false
@@ -22,12 +23,21 @@ func (e embedFileSystem) Exists(prefix string, path string) bool {
 	return true
 }

+func (e *embedFileSystem) Open(name string) (http.File, error) {
+	if name == "/" {
+		// This will make sure the index page goes to NoRouter handler,
+		// which will use the replaced index bytes with analytic codes.
+		return nil, os.ErrNotExist
+	}
+	return e.FileSystem.Open(name)
+}
+
 func EmbedFolder(fsEmbed embed.FS, targetPath string) static.ServeFileSystem {
 	efs, err := fs.Sub(fsEmbed, targetPath)
 	if err != nil {
 		panic(err)
 	}
-	return embedFileSystem{
+	return &embedFileSystem{
 		FileSystem: http.FS(efs),
 	}
 }
--- a/common/gin.go
+++ b/common/gin.go
@@ -2,7 +2,9 @@ package common

 import (
 	"bytes"
+	"fmt"
 	"io"
+	"mime"
 	"mime/multipart"
 	"net/http"
 	"net/url"
@@ -10,24 +12,61 @@ import (
 	"time"

 	"github.com/QuantumNous/new-api/constant"
+	"github.com/pkg/errors"

 	"github.com/gin-gonic/gin"
 )

 const KeyRequestBody = "key_request_body"

-func GetRequestBody(c *gin.Context) ([]byte, error) {
-	requestBody, _ := c.Get(KeyRequestBody)
-	if requestBody != nil {
-		return requestBody.([]byte), nil
+var ErrRequestBodyTooLarge = errors.New("request body too large")
+
+func IsRequestBodyTooLargeError(err error) bool {
+	if err == nil {
+		return false
 	}
-	requestBody, err := io.ReadAll(c.Request.Body)
+	if errors.Is(err, ErrRequestBodyTooLarge) {
+		return true
+	}
+	var mbe *http.MaxBytesError
+	return errors.As(err, &mbe)
+}
+
+func GetRequestBody(c *gin.Context) ([]byte, error) {
+	cached, exists := c.Get(KeyRequestBody)
+	if exists && cached != nil {
+		if b, ok := cached.([]byte); ok {
+			return b, nil
+		}
+	}
+	maxMB := constant.MaxRequestBodyMB
+	if maxMB < 0 {
+		// no limit
+		body, err := io.ReadAll(c.Request.Body)
+		_ = c.Request.Body.Close()
+		if err != nil {
+			return nil, err
+		}
+		c.Set(KeyRequestBody, body)
+		return body, nil
+	}
+	maxBytes := int64(maxMB) << 20
+
+	limited := io.LimitReader(c.Request.Body, maxBytes+1)
+	body, err := io.ReadAll(limited)
 	if err != nil {
+		_ = c.Request.Body.Close()
+		if IsRequestBodyTooLargeError(err) {
+			return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
+		}
 		return nil, err
 	}
 	_ = c.Request.Body.Close()
-	c.Set(KeyRequestBody, requestBody)
-	return requestBody.([]byte), nil
+	if int64(len(body)) > maxBytes {
+		return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
+	}
+	c.Set(KeyRequestBody, body)
+	return body, nil
 }

 func UnmarshalBodyReusable(c *gin.Context, v any) error {
@@ -128,13 +167,13 @@ func ParseMultipartFormReusable(c *gin.Context) (*multipart.Form, error) {
 	}

 	contentType := c.Request.Header.Get("Content-Type")
-	boundary := ""
-	if idx := strings.Index(contentType, "boundary="); idx != -1 {
-		boundary = contentType[idx+9:]
+	boundary, err := parseBoundary(contentType)
+	if err != nil {
+		return nil, err
 	}

 	reader := multipart.NewReader(bytes.NewReader(requestBody), boundary)
-	form, err := reader.ReadForm(32 << 20) // 32 MB max memory
+	form, err := reader.ReadForm(multipartMemoryLimit())
 	if err != nil {
 		return nil, err
 	}
@@ -177,17 +216,16 @@ func parseFormData(data []byte, v any) error {

 func parseMultipartFormData(c *gin.Context, data []byte, v any) error {
 	contentType := c.Request.Header.Get("Content-Type")
-	boundary := ""
-	if idx := strings.Index(contentType, "boundary="); idx != -1 {
-		boundary = contentType[idx+9:]
-	}
-
-	if boundary == "" {
-		return Unmarshal(data, v) // Fallback to JSON
+	boundary, err := parseBoundary(contentType)
+	if err != nil {
+		if errors.Is(err, errBoundaryNotFound) {
+			return Unmarshal(data, v) // Fallback to JSON
+		}
+		return err
 	}

 	reader := multipart.NewReader(bytes.NewReader(data), boundary)
-	form, err := reader.ReadForm(32 << 20) // 32 MB max memory
+	form, err := reader.ReadForm(multipartMemoryLimit())
 	if err != nil {
 		return err
 	}
@@ -203,3 +241,31 @@ func parseMultipartFormData(c *gin.Context, data []byte, v any) error {

 	return processFormMap(formMap, v)
 }
+
+var errBoundaryNotFound = errors.New("multipart boundary not found")
+
+// parseBoundary extracts the multipart boundary from the Content-Type header using mime.ParseMediaType
+func parseBoundary(contentType string) (string, error) {
+	if contentType == "" {
+		return "", errBoundaryNotFound
+	}
+	// Boundary-UUID / boundary-------xxxxxx
+	_, params, err := mime.ParseMediaType(contentType)
+	if err != nil {
+		return "", err
+	}
+	boundary, ok := params["boundary"]
+	if !ok || boundary == "" {
+		return "", errBoundaryNotFound
+	}
+	return boundary, nil
+}
+
+// multipartMemoryLimit returns the configured multipart memory limit in bytes
+func multipartMemoryLimit() int64 {
+	limitMB := constant.MaxFileDownloadMB
+	if limitMB <= 0 {
+		limitMB = 32
+	}
+	return int64(limitMB) << 20
+}
--- a/common/init.go
+++ b/common/init.go
@@ -30,6 +30,11 @@ func printHelp() {
 func InitEnv() {
 	flag.Parse()

+	envVersion := os.Getenv("VERSION")
+	if envVersion != "" {
+		Version = envVersion
+	}
+
 	if *PrintVersion {
 		fmt.Println(Version)
 		os.Exit(0)
@@ -85,6 +90,8 @@ func InitEnv() {
 	SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60)
 	BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
 	RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0)
+	RelayMaxIdleConns = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS", 500)
+	RelayMaxIdleConnsPerHost = GetEnvOrDefault("RELAY_MAX_IDLE_CONNS_PER_HOST", 100)

 	// Initialize string variables with GetEnvOrDefaultString
 	GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
@@ -99,6 +106,9 @@ func InitEnv() {
 	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
 	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))

+	CriticalRateLimitEnable = GetEnvOrDefaultBool("CRITICAL_RATE_LIMIT_ENABLE", true)
+	CriticalRateLimitNum = GetEnvOrDefault("CRITICAL_RATE_LIMIT", 20)
+	CriticalRateLimitDuration = int64(GetEnvOrDefault("CRITICAL_RATE_LIMIT_DURATION", 20*60))
 	initConstantEnv()
 }

@@ -106,10 +116,14 @@ func initConstantEnv() {
 	constant.StreamingTimeout = GetEnvOrDefault("STREAMING_TIMEOUT", 300)
 	constant.DifyDebug = GetEnvOrDefaultBool("DIFY_DEBUG", true)
 	constant.MaxFileDownloadMB = GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
+	constant.StreamScannerMaxBufferMB = GetEnvOrDefault("STREAM_SCANNER_MAX_BUFFER_MB", 64)
+	// MaxRequestBodyMB 请求体最大大小（解压后），用于防止超大请求/zip bomb导致内存暴涨
+	constant.MaxRequestBodyMB = GetEnvOrDefault("MAX_REQUEST_BODY_MB", 64)
 	// ForceStreamOption 覆盖请求参数，强制返回usage信息
 	constant.ForceStreamOption = GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
+	constant.CountToken = GetEnvOrDefaultBool("CountToken", true)
 	constant.GetMediaToken = GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
-	constant.GetMediaTokenNotStream = GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
+	constant.GetMediaTokenNotStream = GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", false)
 	constant.UpdateTask = GetEnvOrDefaultBool("UPDATE_TASK", true)
 	constant.AzureDefaultAPIVersion = GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2025-04-01-preview")
 	constant.GeminiVisionMaxImageNum = GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
@@ -119,6 +133,8 @@ func initConstantEnv() {
 	constant.GenerateDefaultToken = GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
 	// 是否启用错误日志
 	constant.ErrorLogEnabled = GetEnvOrDefaultBool("ERROR_LOG_ENABLED", false)
+	// 任务轮询时查询的最大数量
+	constant.TaskQueryLimit = GetEnvOrDefault("TASK_QUERY_LIMIT", 1000)

 	soraPatchStr := GetEnvOrDefaultString("TASK_PRICE_PATCH", "")
 	if soraPatchStr != "" {
--- a/common/ip.go
+++ b/common/ip.go
@@ -2,6 +2,15 @@ package common

 import "net"

+func IsIP(s string) bool {
+	ip := net.ParseIP(s)
+	return ip != nil
+}
+
+func ParseIP(s string) net.IP {
+	return net.ParseIP(s)
+}
+
 func IsPrivateIP(ip net.IP) bool {
 	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
 		return true
@@ -20,3 +29,23 @@ func IsPrivateIP(ip net.IP) bool {
 	}
 	return false
 }
+
+func IsIpInCIDRList(ip net.IP, cidrList []string) bool {
+	for _, cidr := range cidrList {
+		_, network, err := net.ParseCIDR(cidr)
+		if err != nil {
+			// 尝试作为单个IP处理
+			if whitelistIP := net.ParseIP(cidr); whitelistIP != nil {
+				if ip.Equal(whitelistIP) {
+					return true
+				}
+			}
+			continue
+		}
+
+		if network.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
--- a/common/json.go
+++ b/common/json.go
@@ -23,11 +23,11 @@ func Marshal(v any) ([]byte, error) {
 }

 func GetJsonType(data json.RawMessage) string {
-	data = bytes.TrimSpace(data)
-	if len(data) == 0 {
+	trimmed := bytes.TrimSpace(data)
+	if len(trimmed) == 0 {
 		return "unknown"
 	}
-	firstChar := bytes.TrimSpace(data)[0]
+	firstChar := trimmed[0]
 	switch firstChar {
 	case '{':
 		return "object"
--- a/common/model.go
+++ b/common/model.go
@@ -17,6 +17,13 @@ var (
 		"flux-",
 		"flux.1-",
 	}
+	OpenAITextModels = []string{
+		"gpt-",
+		"o1",
+		"o3",
+		"o4",
+		"chatgpt",
+	}
 )

 func IsOpenAIResponseOnlyModel(modelName string) bool {
@@ -40,3 +47,13 @@ func IsImageGenerationModel(modelName string) bool {
 	}
 	return false
 }
+
+func IsOpenAITextModel(modelName string) bool {
+	modelName = strings.ToLower(modelName)
+	for _, m := range OpenAITextModels {
+		if strings.Contains(modelName, m) {
+			return true
+		}
+	}
+	return false
+}
--- a/common/pyro.go
+++ b/common/pyro.go
@@ -0,0 +1,56 @@
+package common
+
+import (
+	"runtime"
+
+	"github.com/grafana/pyroscope-go"
+)
+
+func StartPyroScope() error {
+
+	pyroscopeUrl := GetEnvOrDefaultString("PYROSCOPE_URL", "")
+	if pyroscopeUrl == "" {
+		return nil
+	}
+
+	pyroscopeAppName := GetEnvOrDefaultString("PYROSCOPE_APP_NAME", "new-api")
+	pyroscopeBasicAuthUser := GetEnvOrDefaultString("PYROSCOPE_BASIC_AUTH_USER", "")
+	pyroscopeBasicAuthPassword := GetEnvOrDefaultString("PYROSCOPE_BASIC_AUTH_PASSWORD", "")
+	pyroscopeHostname := GetEnvOrDefaultString("HOSTNAME", "new-api")
+
+	mutexRate := GetEnvOrDefault("PYROSCOPE_MUTEX_RATE", 5)
+	blockRate := GetEnvOrDefault("PYROSCOPE_BLOCK_RATE", 5)
+
+	runtime.SetMutexProfileFraction(mutexRate)
+	runtime.SetBlockProfileRate(blockRate)
+
+	_, err := pyroscope.Start(pyroscope.Config{
+		ApplicationName: pyroscopeAppName,
+
+		ServerAddress:     pyroscopeUrl,
+		BasicAuthUser:     pyroscopeBasicAuthUser,
+		BasicAuthPassword: pyroscopeBasicAuthPassword,
+
+		Logger: nil,
+
+		Tags: map[string]string{"hostname": pyroscopeHostname},
+
+		ProfileTypes: []pyroscope.ProfileType{
+			pyroscope.ProfileCPU,
+			pyroscope.ProfileAllocObjects,
+			pyroscope.ProfileAllocSpace,
+			pyroscope.ProfileInuseObjects,
+			pyroscope.ProfileInuseSpace,
+
+			pyroscope.ProfileGoroutines,
+			pyroscope.ProfileMutexCount,
+			pyroscope.ProfileMutexDuration,
+			pyroscope.ProfileBlockCount,
+			pyroscope.ProfileBlockDuration,
+		},
+	})
+	if err != nil {
+		return err
+	}
+	return nil
+}
--- a/common/ssrf_protection.go
+++ b/common/ssrf_protection.go
@@ -186,23 +186,7 @@ func isIPListed(ip net.IP, list []string) bool {
 		return false
 	}

-	for _, whitelistCIDR := range list {
-		_, network, err := net.ParseCIDR(whitelistCIDR)
-		if err != nil {
-			// 尝试作为单个IP处理
-			if whitelistIP := net.ParseIP(whitelistCIDR); whitelistIP != nil {
-				if ip.Equal(whitelistIP) {
-					return true
-				}
-			}
-			continue
-		}
-
-		if network.Contains(ip) {
-			return true
-		}
-	}
-	return false
+	return IsIpInCIDRList(ip, list)
 }

 // IsIPAccessAllowed 检查IP是否允许访问
--- a/common/str.go
+++ b/common/str.go
@@ -3,12 +3,19 @@ package common
 import (
 	"encoding/base64"
 	"encoding/json"
-	"math/rand"
 	"net/url"
 	"regexp"
 	"strconv"
 	"strings"
 	"unsafe"
+
+	"github.com/samber/lo"
+)
+
+var (
+	maskURLPattern    = regexp.MustCompile(`(http|https)://[^\s/$.?#].[^\s]*`)
+	maskDomainPattern = regexp.MustCompile(`\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b`)
+	maskIPPattern     = regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
 )

 func GetStringIfEmpty(str string, defaultValue string) string {
@@ -19,12 +26,10 @@ func GetStringIfEmpty(str string, defaultValue string) string {
 }

 func GetRandomString(length int) string {
-	//rand.Seed(time.Now().UnixNano())
-	key := make([]byte, length)
-	for i := 0; i < length; i++ {
-		key[i] = keyChars[rand.Intn(len(keyChars))]
+	if length <= 0 {
+		return ""
 	}
-	return string(key)
+	return lo.RandomString(length, lo.AlphanumericCharset)
 }

 func MapToJsonStr(m map[string]interface{}) string {
@@ -170,8 +175,7 @@ func maskHostForPlainDomain(domain string) string {
 // api.openai.com -> ***.***.com
 func MaskSensitiveInfo(str string) string {
 	// Mask URLs
-	urlPattern := regexp.MustCompile(`(http|https)://[^\s/$.?#].[^\s]*`)
-	str = urlPattern.ReplaceAllStringFunc(str, func(urlStr string) string {
+	str = maskURLPattern.ReplaceAllStringFunc(str, func(urlStr string) string {
 		u, err := url.Parse(urlStr)
 		if err != nil {
 			return urlStr
@@ -224,14 +228,12 @@ func MaskSensitiveInfo(str string) string {
 	})

 	// Mask domain names without protocol (like openai.com, www.openai.com)
-	domainPattern := regexp.MustCompile(`\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b`)
-	str = domainPattern.ReplaceAllStringFunc(str, func(domain string) string {
+	str = maskDomainPattern.ReplaceAllStringFunc(str, func(domain string) string {
 		return maskHostForPlainDomain(domain)
 	})

 	// Mask IP addresses
-	ipPattern := regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
-	str = ipPattern.ReplaceAllString(str, "***.***.***.***")
+	str = maskIPPattern.ReplaceAllString(str, "***.***.***.***")

 	return str
 }
--- a/common/utils.go
+++ b/common/utils.go
@@ -217,11 +217,6 @@ func IntMax(a int, b int) int {
 	}
 }

-func IsIP(s string) bool {
-	ip := net.ParseIP(s)
-	return ip != nil
-}
-
 func GetUUID() string {
 	code := uuid.New().String()
 	code = strings.Replace(code, "-", "", -1)
--- a/constant/api_type.go
+++ b/constant/api_type.go
@@ -34,5 +34,6 @@ const (
 	APITypeMoonshot
 	APITypeSubmodel
 	APITypeMiniMax
+	APITypeReplicate
 	APITypeDummy // this one is only for count, do not add any channel after this
 )
--- a/constant/channel.go
+++ b/constant/channel.go
@@ -53,6 +53,7 @@ const (
 	ChannelTypeSubmodel       = 53
 	ChannelTypeDoubaoVideo    = 54
 	ChannelTypeSora           = 55
+	ChannelTypeReplicate      = 56
 	ChannelTypeDummy          // this one is only for count, do not add any channel after this

 )
@@ -114,6 +115,7 @@ var ChannelBaseURLs = []string{
 	"https://llm.submodel.ai",                   //53
 	"https://ark.cn-beijing.volces.com",         //54
 	"https://api.openai.com",                    //55
+	"https://api.replicate.com",                 //56
 }

 var ChannelTypeNames = map[int]string{
@@ -169,6 +171,7 @@ var ChannelTypeNames = map[int]string{
 	ChannelTypeSubmodel:       "Submodel",
 	ChannelTypeDoubaoVideo:    "DoubaoVideo",
 	ChannelTypeSora:           "Sora",
+	ChannelTypeReplicate:      "Replicate",
 }

 func GetChannelTypeName(channelType int) string {
@@ -177,3 +180,27 @@ func GetChannelTypeName(channelType int) string {
 	}
 	return "Unknown"
 }
+
+type ChannelSpecialBase struct {
+	ClaudeBaseURL string
+	OpenAIBaseURL string
+}
+
+var ChannelSpecialBases = map[string]ChannelSpecialBase{
+	"glm-coding-plan": {
+		ClaudeBaseURL: "https://open.bigmodel.cn/api/anthropic",
+		OpenAIBaseURL: "https://open.bigmodel.cn/api/coding/paas/v4",
+	},
+	"glm-coding-plan-international": {
+		ClaudeBaseURL: "https://api.z.ai/api/anthropic",
+		OpenAIBaseURL: "https://api.z.ai/api/coding/paas/v4",
+	},
+	"kimi-coding-plan": {
+		ClaudeBaseURL: "https://api.kimi.com/coding",
+		OpenAIBaseURL: "https://api.kimi.com/coding/v1",
+	},
+	"doubao-coding-plan": {
+		ClaudeBaseURL: "https://ark.cn-beijing.volces.com/api/coding",
+		OpenAIBaseURL: "https://ark.cn-beijing.volces.com/api/coding/v3",
+	},
+}
--- a/constant/context_key.go
+++ b/constant/context_key.go
@@ -3,8 +3,9 @@ package constant
 type ContextKey string

 const (
-	ContextKeyTokenCountMeta ContextKey = "token_count_meta"
-	ContextKeyPromptTokens   ContextKey = "prompt_tokens"
+	ContextKeyTokenCountMeta  ContextKey = "token_count_meta"
+	ContextKeyPromptTokens    ContextKey = "prompt_tokens"
+	ContextKeyEstimatedTokens ContextKey = "estimated_tokens"

 	ContextKeyOriginalModel    ContextKey = "original_model"
 	ContextKeyRequestStartTime ContextKey = "request_start_time"
@@ -17,6 +18,7 @@ const (
 	ContextKeyTokenSpecificChannelId ContextKey = "specific_channel_id"
 	ContextKeyTokenModelLimitEnabled ContextKey = "token_model_limit_enabled"
 	ContextKeyTokenModelLimit        ContextKey = "token_model_limit"
+	ContextKeyTokenCrossGroupRetry   ContextKey = "token_cross_group_retry"

 	/* channel related keys */
 	ContextKeyChannelId                ContextKey = "channel_id"
@@ -36,6 +38,10 @@ const (
 	ContextKeyChannelMultiKeyIndex     ContextKey = "channel_multi_key_index"
 	ContextKeyChannelKey               ContextKey = "channel_key"

+	ContextKeyAutoGroup           ContextKey = "auto_group"
+	ContextKeyAutoGroupIndex      ContextKey = "auto_group_index"
+	ContextKeyAutoGroupRetryIndex ContextKey = "auto_group_retry_index"
+
 	/* user related keys */
 	ContextKeyUserId      ContextKey = "id"
 	ContextKeyUserSetting ContextKey = "user_setting"
@@ -46,5 +52,7 @@ const (
 	ContextKeyUsingGroup  ContextKey = "group"
 	ContextKeyUserName    ContextKey = "username"

+	ContextKeyLocalCountTokens ContextKey = "local_count_tokens"
+
 	ContextKeySystemPromptOverride ContextKey = "system_prompt_override"
 )
--- a/constant/env.go
+++ b/constant/env.go
@@ -3,16 +3,20 @@ package constant
 var StreamingTimeout int
 var DifyDebug bool
 var MaxFileDownloadMB int
+var StreamScannerMaxBufferMB int
 var ForceStreamOption bool
+var CountToken bool
 var GetMediaToken bool
 var GetMediaTokenNotStream bool
 var UpdateTask bool
+var MaxRequestBodyMB int
 var AzureDefaultAPIVersion string
 var GeminiVisionMaxImageNum int
 var NotifyLimitCount int
 var NotificationLimitDurationMinute int
 var GenerateDefaultToken bool
 var ErrorLogEnabled bool
+var TaskQueryLimit int

 // temporary variable for sora patch, will be removed in future
 var TaskPricePatches []string
--- a/constant/task.go
+++ b/constant/task.go
@@ -15,6 +15,7 @@ const (
 	TaskActionTextGenerate      = "textGenerate"
 	TaskActionFirstTailGenerate = "firstTailGenerate"
 	TaskActionReferenceGenerate = "referenceGenerate"
+	TaskActionRemix             = "remixGenerate"
 )

 var SunoModel2Action = map[string]string{
--- a/controller/billing.go
+++ b/controller/billing.go
@@ -2,9 +2,9 @@ package controller

 import (
 	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting/operation_setting"
+	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
 )

@@ -29,7 +29,7 @@ func GetSubscription(c *gin.Context) {
 		expiredTime = 0
 	}
 	if err != nil {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: err.Error(),
 			Type:    "upstream_error",
 		}
@@ -81,7 +81,7 @@ func GetUsage(c *gin.Context) {
 		quota, err = model.GetUserUsedQuota(userId)
 	}
 	if err != nil {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: err.Error(),
 			Type:    "new_api_error",
 		}
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -97,6 +97,11 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		if channel.Type == constant.ChannelTypeVolcEngine && strings.Contains(testModel, "seedream") {
 			requestPath = "/v1/images/generations"
 		}
+
+		// responses-only models
+		if strings.Contains(strings.ToLower(testModel), "codex") {
+			requestPath = "/v1/responses"
+		}
 	}

 	c.Request = &http.Request{
@@ -176,7 +181,7 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		}
 	}

-	request := buildTestRequest(testModel, endpointType)
+	request := buildTestRequest(testModel, endpointType, channel)

 	info, err := relaycommon.GenRelayInfo(c, relayFormat, request, nil)

@@ -319,6 +324,16 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 		httpResp = resp.(*http.Response)
 		if httpResp.StatusCode != http.StatusOK {
 			err := service.RelayErrorHandler(c.Request.Context(), httpResp, true)
+			common.SysError(fmt.Sprintf(
+				"channel test bad response: channel_id=%d name=%s type=%d model=%s endpoint_type=%s status=%d err=%v",
+				channel.Id,
+				channel.Name,
+				channel.Type,
+				testModel,
+				endpointType,
+				httpResp.StatusCode,
+				err,
+			))
 			return testResult{
 				context:     c,
 				localErr:    err,
@@ -351,7 +366,7 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 			newAPIError: types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError),
 		}
 	}
-	info.PromptTokens = usage.PromptTokens
+	info.SetEstimatePromptTokens(usage.PromptTokens)

 	quota := 0
 	if !priceData.UsePrice {
@@ -389,7 +404,7 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 	}
 }

-func buildTestRequest(model string, endpointType string) dto.Request {
+func buildTestRequest(model string, endpointType string, channel *model.Channel) dto.Request {
 	// 根据端点类型构建不同的测试请求
 	if endpointType != "" {
 		switch constant.EndpointType(endpointType) {
@@ -423,7 +438,7 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 			}
 		case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI:
 			// 返回 GeneralOpenAIRequest
-			maxTokens := uint(10)
+			maxTokens := uint(16)
 			if constant.EndpointType(endpointType) == constant.EndpointTypeGemini {
 				maxTokens = 3000
 			}
@@ -453,6 +468,14 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 		}
 	}

+	// Responses-only models (e.g. codex series)
+	if strings.Contains(strings.ToLower(model), "codex") {
+		return &dto.OpenAIResponsesRequest{
+			Model: model,
+			Input: json.RawMessage("\"hi\""),
+		}
+	}
+
 	// Chat/Completion 请求 - 返回 GeneralOpenAIRequest
 	testRequest := &dto.GeneralOpenAIRequest{
 		Model:  model,
@@ -466,7 +489,7 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 	}

 	if strings.HasPrefix(model, "o") {
-		testRequest.MaxCompletionTokens = 10
+		testRequest.MaxCompletionTokens = 16
 	} else if strings.Contains(model, "thinking") {
 		if !strings.Contains(model, "claude") {
 			testRequest.MaxTokens = 50
@@ -474,7 +497,7 @@ func buildTestRequest(model string, endpointType string) dto.Request {
 	} else if strings.Contains(model, "gemini") {
 		testRequest.MaxTokens = 3000
 	} else {
-		testRequest.MaxTokens = 10
+		testRequest.MaxTokens = 16
 	}

 	return testRequest
@@ -617,6 +640,10 @@ func TestAllChannels(c *gin.Context) {
 var autoTestChannelsOnce sync.Once

 func AutomaticallyTestChannels() {
+	// 只在Master节点定时测试渠道
+	if !common.IsMasterNode {
+		return
+	}
 	autoTestChannelsOnce.Do(func() {
 		for {
 			if !operation_setting.GetMonitorSetting().AutoTestChannelEnabled {
--- a/controller/channel.go
+++ b/controller/channel.go
@@ -91,7 +91,7 @@ func GetAllChannels(c *gin.Context) {
 			if tag == nil || *tag == "" {
 				continue
 			}
-			tagChannels, err := model.GetChannelsByTag(*tag, idSort)
+			tagChannels, err := model.GetChannelsByTag(*tag, idSort, false)
 			if err != nil {
 				continue
 			}
@@ -165,6 +165,30 @@ func GetAllChannels(c *gin.Context) {
 	return
 }

+func buildFetchModelsHeaders(channel *model.Channel, key string) (http.Header, error) {
+	var headers http.Header
+	switch channel.Type {
+	case constant.ChannelTypeAnthropic:
+		headers = GetClaudeAuthHeader(key)
+	default:
+		headers = GetAuthHeader(key)
+	}
+
+	headerOverride := channel.GetHeaderOverride()
+	for k, v := range headerOverride {
+		str, ok := v.(string)
+		if !ok {
+			return nil, fmt.Errorf("invalid header override for key %s", k)
+		}
+		if strings.Contains(str, "{api_key}") {
+			str = strings.ReplaceAll(str, "{api_key}", key)
+		}
+		headers.Set(k, str)
+	}
+
+	return headers, nil
+}
+
 func FetchUpstreamModels(c *gin.Context) {
 	id, err := strconv.Atoi(c.Param("id"))
 	if err != nil {
@@ -191,20 +215,45 @@ func FetchUpstreamModels(c *gin.Context) {
 	case constant.ChannelTypeAli:
 		url = fmt.Sprintf("%s/compatible-mode/v1/models", baseURL)
 	case constant.ChannelTypeZhipu_v4:
-		url = fmt.Sprintf("%s/api/paas/v4/models", baseURL)
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/api/paas/v4/models", baseURL)
+		}
+	case constant.ChannelTypeVolcEngine:
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/v1/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/v1/models", baseURL)
+		}
+	case constant.ChannelTypeMoonshot:
+		if plan, ok := constant.ChannelSpecialBases[baseURL]; ok && plan.OpenAIBaseURL != "" {
+			url = fmt.Sprintf("%s/models", plan.OpenAIBaseURL)
+		} else {
+			url = fmt.Sprintf("%s/v1/models", baseURL)
+		}
 	default:
 		url = fmt.Sprintf("%s/v1/models", baseURL)
 	}

-	// 获取响应体 - 根据渠道类型决定是否添加 AuthHeader
-	var body []byte
-	key := strings.Split(channel.Key, "\n")[0]
-	switch channel.Type {
-	case constant.ChannelTypeAnthropic:
-		body, err = GetResponseBody("GET", url, channel, GetClaudeAuthHeader(key))
-	default:
-		body, err = GetResponseBody("GET", url, channel, GetAuthHeader(key))
+	// 获取用于请求的可用密钥（多密钥渠道优先使用启用状态的密钥）
+	key, _, apiErr := channel.GetNextEnabledKey()
+	if apiErr != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": fmt.Sprintf("获取渠道密钥失败: %s", apiErr.Error()),
+		})
+		return
 	}
+	key = strings.TrimSpace(key)
+
+	headers, err := buildFetchModelsHeaders(channel, key)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+
+	body, err := GetResponseBody("GET", url, channel, headers)
 	if err != nil {
 		common.ApiError(c, err)
 		return
@@ -271,7 +320,7 @@ func SearchChannels(c *gin.Context) {
 		}
 		for _, tag := range tags {
 			if tag != nil && *tag != "" {
-				tagChannel, err := model.GetChannelsByTag(*tag, idSort)
+				tagChannel, err := model.GetChannelsByTag(*tag, idSort, false)
 				if err == nil {
 					channelData = append(channelData, tagChannel...)
 				}
@@ -649,13 +698,15 @@ func DeleteDisabledChannel(c *gin.Context) {
 }

 type ChannelTag struct {
-	Tag          string  `json:"tag"`
-	NewTag       *string `json:"new_tag"`
-	Priority     *int64  `json:"priority"`
-	Weight       *uint   `json:"weight"`
-	ModelMapping *string `json:"model_mapping"`
-	Models       *string `json:"models"`
-	Groups       *string `json:"groups"`
+	Tag            string  `json:"tag"`
+	NewTag         *string `json:"new_tag"`
+	Priority       *int64  `json:"priority"`
+	Weight         *uint   `json:"weight"`
+	ModelMapping   *string `json:"model_mapping"`
+	Models         *string `json:"models"`
+	Groups         *string `json:"groups"`
+	ParamOverride  *string `json:"param_override"`
+	HeaderOverride *string `json:"header_override"`
 }

 func DisableTagChannels(c *gin.Context) {
@@ -721,7 +772,29 @@ func EditTagChannels(c *gin.Context) {
 		})
 		return
 	}
-	err = model.EditChannelByTag(channelTag.Tag, channelTag.NewTag, channelTag.ModelMapping, channelTag.Models, channelTag.Groups, channelTag.Priority, channelTag.Weight)
+	if channelTag.ParamOverride != nil {
+		trimmed := strings.TrimSpace(*channelTag.ParamOverride)
+		if trimmed != "" && !json.Valid([]byte(trimmed)) {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "参数覆盖必须是合法的 JSON 格式",
+			})
+			return
+		}
+		channelTag.ParamOverride = common.GetPointer[string](trimmed)
+	}
+	if channelTag.HeaderOverride != nil {
+		trimmed := strings.TrimSpace(*channelTag.HeaderOverride)
+		if trimmed != "" && !json.Valid([]byte(trimmed)) {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "请求头覆盖必须是合法的 JSON 格式",
+			})
+			return
+		}
+		channelTag.HeaderOverride = common.GetPointer[string](trimmed)
+	}
+	err = model.EditChannelByTag(channelTag.Tag, channelTag.NewTag, channelTag.ModelMapping, channelTag.Models, channelTag.Groups, channelTag.Priority, channelTag.Weight, channelTag.ParamOverride, channelTag.HeaderOverride)
 	if err != nil {
 		common.ApiError(c, err)
 		return
@@ -997,7 +1070,7 @@ func GetTagModels(c *gin.Context) {
 		return
 	}

-	channels, err := model.GetChannelsByTag(tag, false) // Assuming false for idSort is fine here
+	channels, err := model.GetChannelsByTag(tag, false, false) // idSort=false, selectAll=false
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, gin.H{
 			"success": false,
--- a/controller/discord.go
+++ b/controller/discord.go
@@ -0,0 +1,223 @@
+package controller
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/setting/system_setting"
+
+	"github.com/gin-contrib/sessions"
+	"github.com/gin-gonic/gin"
+)
+
+type DiscordResponse struct {
+	AccessToken  string `json:"access_token"`
+	IDToken      string `json:"id_token"`
+	RefreshToken string `json:"refresh_token"`
+	TokenType    string `json:"token_type"`
+	ExpiresIn    int    `json:"expires_in"`
+	Scope        string `json:"scope"`
+}
+
+type DiscordUser struct {
+	UID  string `json:"id"`
+	ID   string `json:"username"`
+	Name string `json:"global_name"`
+}
+
+func getDiscordUserInfoByCode(code string) (*DiscordUser, error) {
+	if code == "" {
+		return nil, errors.New("无效的参数")
+	}
+
+	values := url.Values{}
+	values.Set("client_id", system_setting.GetDiscordSettings().ClientId)
+	values.Set("client_secret", system_setting.GetDiscordSettings().ClientSecret)
+	values.Set("code", code)
+	values.Set("grant_type", "authorization_code")
+	values.Set("redirect_uri", fmt.Sprintf("%s/oauth/discord", system_setting.ServerAddress))
+	formData := values.Encode()
+	req, err := http.NewRequest("POST", "https://discord.com/api/v10/oauth2/token", strings.NewReader(formData))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	client := http.Client{
+		Timeout: 5 * time.Second,
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 Discord 服务器，请稍后重试！")
+	}
+	defer res.Body.Close()
+	var discordResponse DiscordResponse
+	err = json.NewDecoder(res.Body).Decode(&discordResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if discordResponse.AccessToken == "" {
+		common.SysError("Discord 获取 Token 失败，请检查设置！")
+		return nil, errors.New("Discord 获取 Token 失败，请检查设置！")
+	}
+
+	req, err = http.NewRequest("GET", "https://discord.com/api/v10/users/@me", nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+discordResponse.AccessToken)
+	res2, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 Discord 服务器，请稍后重试！")
+	}
+	defer res2.Body.Close()
+	if res2.StatusCode != http.StatusOK {
+		common.SysError("Discord 获取用户信息失败！请检查设置！")
+		return nil, errors.New("Discord 获取用户信息失败！请检查设置！")
+	}
+
+	var discordUser DiscordUser
+	err = json.NewDecoder(res2.Body).Decode(&discordUser)
+	if err != nil {
+		return nil, err
+	}
+	if discordUser.UID == "" || discordUser.ID == "" {
+		common.SysError("Discord 获取用户信息为空！请检查设置！")
+		return nil, errors.New("Discord 获取用户信息为空！请检查设置！")
+	}
+	return &discordUser, nil
+}
+
+func DiscordOAuth(c *gin.Context) {
+	session := sessions.Default(c)
+	state := c.Query("state")
+	if state == "" || session.Get("oauth_state") == nil || state != session.Get("oauth_state").(string) {
+		c.JSON(http.StatusForbidden, gin.H{
+			"success": false,
+			"message": "state is empty or not same",
+		})
+		return
+	}
+	username := session.Get("username")
+	if username != nil {
+		DiscordBind(c)
+		return
+	}
+	if !system_setting.GetDiscordSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 Discord 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	discordUser, err := getDiscordUserInfoByCode(code)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user := model.User{
+		DiscordId: discordUser.UID,
+	}
+	if model.IsDiscordIdAlreadyTaken(user.DiscordId) {
+		err := user.FillUserByDiscordId()
+		if err != nil {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": err.Error(),
+			})
+			return
+		}
+	} else {
+		if common.RegisterEnabled {
+			if discordUser.ID != "" {
+				user.Username = discordUser.ID
+			} else {
+				user.Username = "discord_" + strconv.Itoa(model.GetMaxUserId()+1)
+			}
+			if discordUser.Name != "" {
+				user.DisplayName = discordUser.Name
+			} else {
+				user.DisplayName = "Discord User"
+			}
+			err := user.Insert(0)
+			if err != nil {
+				c.JSON(http.StatusOK, gin.H{
+					"success": false,
+					"message": err.Error(),
+				})
+				return
+			}
+		} else {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "管理员关闭了新用户注册",
+			})
+			return
+		}
+	}
+
+	if user.Status != common.UserStatusEnabled {
+		c.JSON(http.StatusOK, gin.H{
+			"message": "用户已被封禁",
+			"success": false,
+		})
+		return
+	}
+	setupLogin(&user, c)
+}
+
+func DiscordBind(c *gin.Context) {
+	if !system_setting.GetDiscordSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 Discord 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	discordUser, err := getDiscordUserInfoByCode(code)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user := model.User{
+		DiscordId: discordUser.UID,
+	}
+	if model.IsDiscordIdAlreadyTaken(user.DiscordId) {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "该 Discord 账户已被绑定",
+		})
+		return
+	}
+	session := sessions.Default(c)
+	id := session.Get("id")
+	user.Id = id.(int)
+	err = user.FillUserById()
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	user.DiscordId = discordUser.UID
+	err = user.Update(false)
+	if err != nil {
+		common.ApiError(c, err)
+		return
+	}
+	c.JSON(http.StatusOK, gin.H{
+		"success": true,
+		"message": "bind",
+	})
+}
--- a/controller/github.go
+++ b/controller/github.go
@@ -44,7 +44,7 @@ func getGitHubUserInfoByCode(code string) (*GitHubUser, error) {
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Accept", "application/json")
 	client := http.Client{
-		Timeout: 5 * time.Second,
+		Timeout: 20 * time.Second,
 	}
 	res, err := client.Do(req)
 	if err != nil {
--- a/controller/linuxdo.go
+++ b/controller/linuxdo.go
@@ -84,7 +84,7 @@ func getLinuxdoUserInfoByCode(code string, c *gin.Context) (*LinuxdoUser, error)
 	}

 	// Get access token using Basic auth
-	tokenEndpoint := "https://connect.linux.do/oauth2/token"
+	tokenEndpoint := common.GetEnvOrDefaultString("LINUX_DO_TOKEN_ENDPOINT", "https://connect.linux.do/oauth2/token")
 	credentials := common.LinuxDOClientId + ":" + common.LinuxDOClientSecret
 	basicAuth := "Basic " + base64.StdEncoding.EncodeToString([]byte(credentials))

@@ -129,7 +129,7 @@ func getLinuxdoUserInfoByCode(code string, c *gin.Context) (*LinuxdoUser, error)
 	}

 	// Get user info
-	userEndpoint := "https://connect.linux.do/api/user"
+	userEndpoint := common.GetEnvOrDefaultString("LINUX_DO_USER_ENDPOINT", "https://connect.linux.do/api/user")
 	req, err = http.NewRequest("GET", userEndpoint, nil)
 	if err != nil {
 		return nil, err
--- a/controller/misc.go
+++ b/controller/misc.go
@@ -52,6 +52,8 @@ func GetStatus(c *gin.Context) {
 		"email_verification":          common.EmailVerificationEnabled,
 		"github_oauth":                common.GitHubOAuthEnabled,
 		"github_client_id":            common.GitHubClientId,
+		"discord_oauth":               system_setting.GetDiscordSettings().Enabled,
+		"discord_client_id":           system_setting.GetDiscordSettings().ClientId,
 		"linuxdo_oauth":               common.LinuxDOOAuthEnabled,
 		"linuxdo_client_id":           common.LinuxDOClientId,
 		"linuxdo_minimum_trust_level": common.LinuxDOMinimumTrustLevel,
--- a/controller/model.go
+++ b/controller/model.go
@@ -16,6 +16,9 @@ import (
 	"github.com/QuantumNous/new-api/relay/channel/moonshot"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/setting/operation_setting"
+	"github.com/QuantumNous/new-api/setting/ratio_setting"
+	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
 	"github.com/samber/lo"
 )
@@ -109,6 +112,17 @@ func init() {
 func ListModels(c *gin.Context, modelType int) {
 	userOpenAiModels := make([]dto.OpenAIModels, 0)

+	acceptUnsetRatioModel := operation_setting.SelfUseModeEnabled
+	if !acceptUnsetRatioModel {
+		userId := c.GetInt("id")
+		if userId > 0 {
+			userSettings, _ := model.GetUserSetting(userId, false)
+			if userSettings.AcceptUnsetRatioModel {
+				acceptUnsetRatioModel = true
+			}
+		}
+	}
+
 	modelLimitEnable := common.GetContextKeyBool(c, constant.ContextKeyTokenModelLimitEnabled)
 	if modelLimitEnable {
 		s, ok := common.GetContextKey(c, constant.ContextKeyTokenModelLimit)
@@ -119,6 +133,12 @@ func ListModels(c *gin.Context, modelType int) {
 			tokenModelLimit = map[string]bool{}
 		}
 		for allowModel, _ := range tokenModelLimit {
+			if !acceptUnsetRatioModel {
+				_, _, exist := ratio_setting.GetModelRatioOrPrice(allowModel)
+				if !exist {
+					continue
+				}
+			}
 			if oaiModel, ok := openAIModelsMap[allowModel]; ok {
 				oaiModel.SupportedEndpointTypes = model.GetModelSupportEndpointTypes(allowModel)
 				userOpenAiModels = append(userOpenAiModels, oaiModel)
@@ -161,6 +181,12 @@ func ListModels(c *gin.Context, modelType int) {
 			models = model.GetGroupEnabledModels(group)
 		}
 		for _, modelName := range models {
+			if !acceptUnsetRatioModel {
+				_, _, exist := ratio_setting.GetModelRatioOrPrice(modelName)
+				if !exist {
+					continue
+				}
+			}
 			if oaiModel, ok := openAIModelsMap[modelName]; ok {
 				oaiModel.SupportedEndpointTypes = model.GetModelSupportEndpointTypes(modelName)
 				userOpenAiModels = append(userOpenAiModels, oaiModel)
@@ -175,6 +201,7 @@ func ListModels(c *gin.Context, modelType int) {
 			}
 		}
 	}
+
 	switch modelType {
 	case constant.ChannelTypeAnthropic:
 		useranthropicModels := make([]dto.AnthropicModel, len(userOpenAiModels))
@@ -249,7 +276,7 @@ func RetrieveModel(c *gin.Context, modelType int) {
 			c.JSON(200, aiModel)
 		}
 	} else {
-		openAIError := dto.OpenAIError{
+		openAIError := types.OpenAIError{
 			Message: fmt.Sprintf("The model '%s' does not exist", modelId),
 			Type:    "invalid_request_error",
 			Param:   "model",
--- a/controller/model_sync.go
+++ b/controller/model_sync.go
@@ -249,7 +249,9 @@ func ensureVendorID(vendorName string, vendorByName map[string]upstreamVendor, v
 	return 0
 }

-// SyncUpstreamModels 同步上游模型与供应商，仅对「未配置模型」生效
+// SyncUpstreamModels 同步上游模型与供应商：
+// - 默认仅创建「未配置模型」
+// - 可通过 overwrite 选择性覆盖更新本地已有模型的字段（前提：sync_official <> 0）
 func SyncUpstreamModels(c *gin.Context) {
 	var req syncRequest
 	// 允许空体
@@ -260,12 +262,26 @@ func SyncUpstreamModels(c *gin.Context) {
 		c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
 		return
 	}
-	if len(missing) == 0 {
-		c.JSON(http.StatusOK, gin.H{"success": true, "data": gin.H{
-			"created_models":  0,
-			"created_vendors": 0,
-			"skipped_models":  []string{},
-		}})
+
+	// 若既无缺失模型需要创建，也未指定覆盖更新字段，则无需请求上游数据，直接返回
+	if len(missing) == 0 && len(req.Overwrite) == 0 {
+		modelsURL, vendorsURL := getUpstreamURLs(req.Locale)
+		c.JSON(http.StatusOK, gin.H{
+			"success": true,
+			"data": gin.H{
+				"created_models":  0,
+				"created_vendors": 0,
+				"updated_models":  0,
+				"skipped_models":  []string{},
+				"created_list":    []string{},
+				"updated_list":    []string{},
+				"source": gin.H{
+					"locale":      req.Locale,
+					"models_url":  modelsURL,
+					"vendors_url": vendorsURL,
+				},
+			},
+		})
 		return
 	}

@@ -315,9 +331,9 @@ func SyncUpstreamModels(c *gin.Context) {
 	createdModels := 0
 	createdVendors := 0
 	updatedModels := 0
-	var skipped []string
-	var createdList []string
-	var updatedList []string
+	skipped := make([]string, 0)
+	createdList := make([]string, 0)
+	updatedList := make([]string, 0)

 	// 本地缓存：vendorName -> id
 	vendorIDCache := make(map[string]int)
--- a/controller/option.go
+++ b/controller/option.go
@@ -71,6 +71,14 @@ func UpdateOption(c *gin.Context) {
 			})
 			return
 		}
+	case "discord.enabled":
+		if option.Value == "true" && system_setting.GetDiscordSettings().ClientId == "" {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无法启用 Discord OAuth，请先填入 Discord Client Id 以及 Discord Client Secret！",
+			})
+			return
+		}
 	case "oidc.enabled":
 		if option.Value == "true" && system_setting.GetOIDCSettings().ClientId == "" {
 			c.JSON(http.StatusOK, gin.H{
--- a/controller/playground.go
+++ b/controller/playground.go
@@ -3,12 +3,10 @@ package controller
 import (
 	"errors"
 	"fmt"
-	"time"

-	"github.com/QuantumNous/new-api/common"
-	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/middleware"
 	"github.com/QuantumNous/new-api/model"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -31,8 +29,11 @@ func Playground(c *gin.Context) {
 		return
 	}

-	group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
-	modelName := c.GetString("original_model")
+	relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatOpenAI, nil, nil)
+	if err != nil {
+		newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest, types.ErrOptionWithSkipRetry())
+		return
+	}

 	userId := c.GetInt("id")

@@ -46,16 +47,10 @@ func Playground(c *gin.Context) {

 	tempToken := &model.Token{
 		UserId: userId,
-		Name:   fmt.Sprintf("playground-%s", group),
-		Group:  group,
+		Name:   fmt.Sprintf("playground-%s", relayInfo.UsingGroup),
+		Group:  relayInfo.UsingGroup,
 	}
 	_ = middleware.SetupContextForToken(c, tempToken)
-	_, newAPIError = getChannel(c, group, modelName, 0)
-	if newAPIError != nil {
-		return
-	}
-	//middleware.SetupContextForSelectedChannel(c, channel, playgroundRequest.Model)
-	common.SetContextKey(c, constant.ContextKeyRequestStartTime, time.Now())

 	Relay(c, types.RelayFormatOpenAI)
 }
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -2,6 +2,7 @@ package controller

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
 	"log"
@@ -64,8 +65,8 @@ func geminiRelayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewA
 func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 	requestId := c.GetString(common.RequestIdKey)
-	group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
-	originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)
+	//group := common.GetContextKeyString(c, constant.ContextKeyUsingGroup)
+	//originalModel := common.GetContextKeyString(c, constant.ContextKeyOriginalModel)

 	var (
 		newAPIError *types.NewAPIError
@@ -104,7 +105,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 	request, err := helper.GetAndValidateRequest(c, relayFormat)
 	if err != nil {
-		newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
+		// Map "request body too large" to 413 so clients can handle it correctly
+		if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
+			newAPIError = types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
+		} else {
+			newAPIError = types.NewError(err, types.ErrorCodeInvalidRequest)
+		}
 		return
 	}

@@ -114,9 +120,17 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		return
 	}

-	meta := request.GetTokenCountMeta()
+	needSensitiveCheck := setting.ShouldCheckPromptSensitive()
+	needCountToken := constant.CountToken
+	// Avoid building huge CombineText (strings.Join) when token counting and sensitive check are both disabled.
+	var meta *types.TokenCountMeta
+	if needSensitiveCheck || needCountToken {
+		meta = request.GetTokenCountMeta()
+	} else {
+		meta = fastTokenCountMetaForPricing(request)
+	}

-	if setting.ShouldCheckPromptSensitive() {
+	if needSensitiveCheck && meta != nil {
 		contains, words := service.CheckSensitiveText(meta.CombineText)
 		if contains {
 			logger.LogWarn(c, fmt.Sprintf("user sensitive words detected: %s", strings.Join(words, ", ")))
@@ -125,13 +139,13 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		}
 	}

-	tokens, err := service.CountRequestToken(c, meta, relayInfo)
+	tokens, err := service.EstimateRequestToken(c, meta, relayInfo)
 	if err != nil {
 		newAPIError = types.NewError(err, types.ErrorCodeCountTokenFailed)
 		return
 	}

-	relayInfo.SetPromptTokens(tokens)
+	relayInfo.SetEstimatePromptTokens(tokens)

 	priceData, err := helper.ModelPriceHelper(c, relayInfo, tokens, meta)
 	if err != nil {
@@ -157,16 +171,32 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
 		}
 	}()

-	for i := 0; i <= common.RetryTimes; i++ {
-		channel, err := getChannel(c, group, originalModel, i)
-		if err != nil {
-			logger.LogError(c, err.Error())
-			newAPIError = err
+	retryParam := &service.RetryParam{
+		Ctx:        c,
+		TokenGroup: relayInfo.TokenGroup,
+		ModelName:  relayInfo.OriginModelName,
+		Retry:      common.GetPointer(0),
+	}
+
+	for ; retryParam.GetRetry() <= common.RetryTimes; retryParam.IncreaseRetry() {
+		channel, channelErr := getChannel(c, relayInfo, retryParam)
+		if channelErr != nil {
+			logger.LogError(c, channelErr.Error())
+			newAPIError = channelErr
 			break
 		}

 		addUsedChannel(c, channel.Id)
-		requestBody, _ := common.GetRequestBody(c)
+		requestBody, bodyErr := common.GetRequestBody(c)
+		if bodyErr != nil {
+			// Ensure consistent 413 for oversized bodies even when error occurs later (e.g., retry path)
+			if common.IsRequestBodyTooLargeError(bodyErr) || errors.Is(bodyErr, common.ErrRequestBodyTooLarge) {
+				newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusRequestEntityTooLarge, types.ErrOptionWithSkipRetry())
+			} else {
+				newAPIError = types.NewErrorWithStatusCode(bodyErr, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
+			}
+			break
+		}
 		c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))

 		switch relayFormat {
@@ -186,7 +216,7 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {

 		processChannelError(c, *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey, common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()), newAPIError)

-		if !shouldRetry(c, newAPIError, common.RetryTimes-i) {
+		if !shouldRetry(c, newAPIError, common.RetryTimes-retryParam.GetRetry()) {
 			break
 		}
 	}
@@ -211,8 +241,35 @@ func addUsedChannel(c *gin.Context, channelId int) {
 	c.Set("use_channel", useChannel)
 }

-func getChannel(c *gin.Context, group, originalModel string, retryCount int) (*model.Channel, *types.NewAPIError) {
-	if retryCount == 0 {
+func fastTokenCountMetaForPricing(request dto.Request) *types.TokenCountMeta {
+	if request == nil {
+		return &types.TokenCountMeta{}
+	}
+	meta := &types.TokenCountMeta{
+		TokenType: types.TokenTypeTokenizer,
+	}
+	switch r := request.(type) {
+	case *dto.GeneralOpenAIRequest:
+		if r.MaxCompletionTokens > r.MaxTokens {
+			meta.MaxTokens = int(r.MaxCompletionTokens)
+		} else {
+			meta.MaxTokens = int(r.MaxTokens)
+		}
+	case *dto.OpenAIResponsesRequest:
+		meta.MaxTokens = int(r.MaxOutputTokens)
+	case *dto.ClaudeRequest:
+		meta.MaxTokens = int(r.MaxTokens)
+	case *dto.ImageRequest:
+		// Pricing for image requests depends on ImagePriceRatio; safe to compute even when CountToken is disabled.
+		return r.GetTokenCountMeta()
+	default:
+		// Best-effort: leave CombineText empty to avoid large allocations.
+	}
+	return meta
+}
+
+func getChannel(c *gin.Context, info *relaycommon.RelayInfo, retryParam *service.RetryParam) (*model.Channel, *types.NewAPIError) {
+	if info.ChannelMeta == nil {
 		autoBan := c.GetBool("auto_ban")
 		autoBanInt := 1
 		if !autoBan {
@@ -225,14 +282,18 @@ func getChannel(c *gin.Context, group, originalModel string, retryCount int) (*m
 			AutoBan: &autoBanInt,
 		}, nil
 	}
-	channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(c, group, originalModel, retryCount)
+	channel, selectGroup, err := service.CacheGetRandomSatisfiedChannel(retryParam)
+
+	info.PriceData.GroupRatioInfo = helper.HandleGroupRatio(c, info)
+
 	if err != nil {
-		return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败（retry）: %s", selectGroup, originalModel, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
+		return nil, types.NewError(fmt.Errorf("获取分组 %s 下模型 %s 的可用渠道失败（retry）: %s", selectGroup, info.OriginModelName, err.Error()), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
 	}
 	if channel == nil {
-		return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在（retry）", selectGroup, originalModel), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
+		return nil, types.NewError(fmt.Errorf("分组 %s 下模型 %s 的可用渠道不存在（retry）", selectGroup, info.OriginModelName), types.ErrorCodeGetChannelFailed, types.ErrOptionWithSkipRetry())
 	}
-	newAPIError := middleware.SetupContextForSelectedChannel(c, channel, originalModel)
+
+	newAPIError := middleware.SetupContextForSelectedChannel(c, channel, info.OriginModelName)
 	if newAPIError != nil {
 		return nil, newAPIError
 	}
@@ -285,7 +346,7 @@ func processChannelError(c *gin.Context, channelError types.ChannelError, err *t
 	logger.LogError(c, fmt.Sprintf("channel error (channel #%d, status code: %d): %s", channelError.ChannelId, err.StatusCode, err.Error()))
 	// 不要使用context获取渠道信息，异步处理时可能会出现渠道信息不一致的情况
 	// do not use context to get channel info, there may be inconsistent channel info when processing asynchronously
-	if service.ShouldDisableChannel(channelError.ChannelId, err) && channelError.AutoBan {
+	if service.ShouldDisableChannel(channelError.ChannelType, err) && channelError.AutoBan {
 		gopool.Go(func() {
 			service.DisableChannel(channelError, err.Error())
 		})
@@ -366,7 +427,7 @@ func RelayMidjourney(c *gin.Context) {
 }

 func RelayNotImplemented(c *gin.Context) {
-	err := dto.OpenAIError{
+	err := types.OpenAIError{
 		Message: "API not implemented",
 		Type:    "new_api_error",
 		Param:   "",
@@ -378,7 +439,7 @@ func RelayNotImplemented(c *gin.Context) {
 }

 func RelayNotFound(c *gin.Context) {
-	err := dto.OpenAIError{
+	err := types.OpenAIError{
 		Message: fmt.Sprintf("Invalid URL (%s %s)", c.Request.Method, c.Request.URL.Path),
 		Type:    "invalid_request_error",
 		Param:   "",
@@ -392,8 +453,6 @@ func RelayNotFound(c *gin.Context) {
 func RelayTask(c *gin.Context) {
 	retryTimes := common.RetryTimes
 	channelId := c.GetInt("channel_id")
-	group := c.GetString("group")
-	originalModel := c.GetString("original_model")
 	c.Set("use_channel", []string{fmt.Sprintf("%d", channelId)})
 	relayInfo, err := relaycommon.GenRelayInfo(c, types.RelayFormatTask, nil, nil)
 	if err != nil {
@@ -403,8 +462,14 @@ func RelayTask(c *gin.Context) {
 	if taskErr == nil {
 		retryTimes = 0
 	}
-	for i := 0; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && i < retryTimes; i++ {
-		channel, newAPIError := getChannel(c, group, originalModel, i)
+	retryParam := &service.RetryParam{
+		Ctx:        c,
+		TokenGroup: relayInfo.TokenGroup,
+		ModelName:  relayInfo.OriginModelName,
+		Retry:      common.GetPointer(0),
+	}
+	for ; shouldRetryTaskRelay(c, channelId, taskErr, retryTimes) && retryParam.GetRetry() < retryTimes; retryParam.IncreaseRetry() {
+		channel, newAPIError := getChannel(c, relayInfo, retryParam)
 		if newAPIError != nil {
 			logger.LogError(c, fmt.Sprintf("CacheGetRandomSatisfiedChannel failed: %s", newAPIError.Error()))
 			taskErr = service.TaskErrorWrapperLocal(newAPIError.Err, "get_channel_failed", http.StatusInternalServerError)
@@ -414,10 +479,18 @@ func RelayTask(c *gin.Context) {
 		useChannel := c.GetStringSlice("use_channel")
 		useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
 		c.Set("use_channel", useChannel)
-		logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, i))
+		logger.LogInfo(c, fmt.Sprintf("using channel #%d to retry (remain times %d)", channel.Id, retryParam.GetRetry()))
 		//middleware.SetupContextForSelectedChannel(c, channel, originalModel)

-		requestBody, _ := common.GetRequestBody(c)
+		requestBody, err := common.GetRequestBody(c)
+		if err != nil {
+			if common.IsRequestBodyTooLargeError(err) || errors.Is(err, common.ErrRequestBodyTooLarge) {
+				taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusRequestEntityTooLarge)
+			} else {
+				taskErr = service.TaskErrorWrapperLocal(err, "read_request_body_failed", http.StatusBadRequest)
+			}
+			break
+		}
 		c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
 		taskErr = taskRelayHandler(c, relayInfo)
 	}
--- a/controller/task.go
+++ b/controller/task.go
@@ -29,7 +29,7 @@ func UpdateTaskBulk() {
 		time.Sleep(time.Duration(15) * time.Second)
 		common.SysLog("任务进度轮询开始")
 		ctx := context.TODO()
-		allTasks := model.GetAllUnFinishSyncTasks(500)
+		allTasks := model.GetAllUnFinishSyncTasks(constant.TaskQueryLimit)
 		platformTask := make(map[constant.TaskPlatform][]*model.Task)
 		for _, t := range allTasks {
 			platformTask[t.Platform] = append(platformTask[t.Platform], t)
@@ -88,7 +88,7 @@ func UpdateSunoTaskAll(ctx context.Context, taskChannelM map[int][]string, taskM
 	for channelId, taskIds := range taskChannelM {
 		err := updateSunoTaskAll(ctx, channelId, taskIds, taskM)
 		if err != nil {
-			logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %d", channelId, err.Error()))
+			logger.LogError(ctx, fmt.Sprintf("渠道 #%d 更新异步任务失败: %s", channelId, err.Error()))
 		}
 	}
 	return nil
@@ -116,9 +116,10 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
 	if adaptor == nil {
 		return errors.New("adaptor not found")
 	}
+	proxy := channel.GetSetting().Proxy
 	resp, err := adaptor.FetchTask(*channel.BaseURL, channel.Key, map[string]any{
 		"ids": taskIds,
-	})
+	}, proxy)
 	if err != nil {
 		common.SysLog(fmt.Sprintf("Get Task Do req error: %v", err))
 		return err
@@ -140,7 +141,7 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
 		return err
 	}
 	if !responseItems.IsSuccess() {
-		common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %d", channelId, len(taskIds), string(responseBody)))
+		common.SysLog(fmt.Sprintf("渠道 #%d 未完成的任务有: %d, 成功获取到任务数: %s", channelId, len(taskIds), string(responseBody)))
 		return err
 	}

--- a/controller/task_video.go
+++ b/controller/task_video.go
@@ -52,6 +52,7 @@ func updateVideoTaskAll(ctx context.Context, platform constant.TaskPlatform, cha
 	info.ChannelMeta = &relaycommon.ChannelMeta{
 		ChannelBaseUrl: cacheGetChannel.GetBaseURL(),
 	}
+	info.ApiKey = cacheGetChannel.Key
 	adaptor.Init(info)
 	for _, taskId := range taskIds {
 		if err := updateVideoSingleTask(ctx, adaptor, cacheGetChannel, taskId, taskM); err != nil {
@@ -66,6 +67,7 @@ func updateVideoSingleTask(ctx context.Context, adaptor channel.TaskAdaptor, cha
 	if channel.GetBaseURL() != "" {
 		baseURL = channel.GetBaseURL()
 	}
+	proxy := channel.GetSetting().Proxy

 	task := taskM[taskId]
 	if task == nil {
@@ -75,7 +77,7 @@ func updateVideoSingleTask(ctx context.Context, adaptor channel.TaskAdaptor, cha
 	resp, err := adaptor.FetchTask(baseURL, channel.Key, map[string]any{
 		"task_id": taskId,
 		"action":  task.Action,
-	})
+	}, proxy)
 	if err != nil {
 		return fmt.Errorf("fetchTask failed for task %s: %w", taskId, err)
 	}
--- a/controller/token.go
+++ b/controller/token.go
@@ -142,7 +142,7 @@ func AddToken(c *gin.Context) {
 		common.ApiError(c, err)
 		return
 	}
-	if len(token.Name) > 30 {
+	if len(token.Name) > 50 {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
 			"message": "令牌名称过长",
@@ -171,6 +171,7 @@ func AddToken(c *gin.Context) {
 		ModelLimits:        token.ModelLimits,
 		AllowIps:           token.AllowIps,
 		Group:              token.Group,
+		CrossGroupRetry:    token.CrossGroupRetry,
 	}
 	err = cleanToken.Insert()
 	if err != nil {
@@ -208,7 +209,7 @@ func UpdateToken(c *gin.Context) {
 		common.ApiError(c, err)
 		return
 	}
-	if len(token.Name) > 30 {
+	if len(token.Name) > 50 {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
 			"message": "令牌名称过长",
@@ -248,6 +249,7 @@ func UpdateToken(c *gin.Context) {
 		cleanToken.ModelLimits = token.ModelLimits
 		cleanToken.AllowIps = token.AllowIps
 		cleanToken.Group = token.Group
+		cleanToken.CrossGroupRetry = token.CrossGroupRetry
 	}
 	err = cleanToken.Update()
 	if err != nil {
--- a/controller/topup_creem.go
+++ b/controller/topup_creem.go
@@ -7,12 +7,12 @@ import (
 	"encoding/hex"
 	"encoding/json"
 	"fmt"
-	"io"
-	"log"
-	"net/http"
 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/setting"
+	"io"
+	"log"
+	"net/http"
 	"time"

 	"github.com/gin-gonic/gin"
--- a/controller/user.go
+++ b/controller/user.go
@@ -110,18 +110,17 @@ func setupLogin(user *model.User, c *gin.Context) {
 		})
 		return
 	}
-	cleanUser := model.User{
-		Id:          user.Id,
-		Username:    user.Username,
-		DisplayName: user.DisplayName,
-		Role:        user.Role,
-		Status:      user.Status,
-		Group:       user.Group,
-	}
 	c.JSON(http.StatusOK, gin.H{
 		"message": "",
 		"success": true,
-		"data":    cleanUser,
+		"data": map[string]any{
+			"id":           user.Id,
+			"username":     user.Username,
+			"display_name": user.DisplayName,
+			"role":         user.Role,
+			"status":       user.Status,
+			"group":        user.Group,
+		},
 	})
 }

@@ -288,11 +287,37 @@ func GetAllUsers(c *gin.Context) {
 	return
 }

+// SearchUsers handles a request to find users by keyword, group, or external identifier filters and returns paginated results.
+// It requires at least one search parameter (keyword, group, or any of the external ID/email filters) and responds with an error if none are provided.
+// On success it populates the page query with matching users and total count and returns that page info.
 func SearchUsers(c *gin.Context) {
 	keyword := c.Query("keyword")
 	group := c.Query("group")
+	filters := map[string]string{
+		"github_id":   c.Query("github_id"),
+		"discord_id":  c.Query("discord_id"),
+		"oidc_id":     c.Query("oidc_id"),
+		"wechat_id":   c.Query("wechat_id"),
+		"email":       c.Query("email"),
+		"telegram_id": c.Query("telegram_id"),
+		"linux_do_id": c.Query("linux_do_id"),
+	}
+
+	// 检查是否至少有一个搜索条件
+	hasFilter := keyword != "" || group != ""
+	for _, v := range filters {
+		if v != "" {
+			hasFilter = true
+			break
+		}
+	}
+	if !hasFilter {
+		common.ApiErrorMsg(c, "at least one search parameter is required")
+		return
+	}
+
 	pageInfo := common.GetPageQuery(c)
-	users, total, err := model.SearchUsers(keyword, group, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
+	users, total, err := model.SearchUsers(keyword, group, filters, pageInfo.GetStartIdx(), pageInfo.GetPageSize())
 	if err != nil {
 		common.ApiError(c, err)
 		return
@@ -453,6 +478,7 @@ func GetSelf(c *gin.Context) {
 		"status":            user.Status,
 		"email":             user.Email,
 		"github_id":         user.GitHubId,
+		"discord_id":        user.DiscordId,
 		"oidc_id":           user.OidcId,
 		"wechat_id":         user.WeChatId,
 		"telegram_id":       user.TelegramId,
@@ -763,7 +789,10 @@ func checkUpdatePassword(originalPassword string, newPassword string, userId int
 	if err != nil {
 		return
 	}
-	if !common.ValidatePasswordAndHash(originalPassword, currentUser.Password) {
+
+	// 密码不为空,需要验证原密码
+	// 支持第一次账号绑定时原密码为空的情况
+	if !common.ValidatePasswordAndHash(originalPassword, currentUser.Password) && currentUser.Password != "" {
 		err = fmt.Errorf("原密码错误")
 		return
 	}
@@ -1290,4 +1319,4 @@ func UpdateUserSetting(c *gin.Context) {
 		"success": true,
 		"message": "设置已更新",
 	})
-}
+}
--- a/controller/video_proxy.go
+++ b/controller/video_proxy.go
@@ -1,6 +1,7 @@
 package controller

 import (
+	"context"
 	"fmt"
 	"io"
 	"net/http"
@@ -10,6 +11,7 @@ import (
 	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
+	"github.com/QuantumNous/new-api/service"

 	"github.com/gin-gonic/gin"
 )
@@ -75,11 +77,22 @@ func VideoProxy(c *gin.Context) {
 	}

 	var videoURL string
-	client := &http.Client{
-		Timeout: 60 * time.Second,
+	proxy := channel.GetSetting().Proxy
+	client, err := service.GetHttpClientWithProxy(proxy)
+	if err != nil {
+		logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create proxy client for task %s: %s", taskID, err.Error()))
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": gin.H{
+				"message": "Failed to create proxy client",
+				"type":    "server_error",
+			},
+		})
+		return
 	}

-	req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, "", nil)
+	ctx, cancel := context.WithTimeout(c.Request.Context(), 60*time.Second)
+	defer cancel()
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, "", nil)
 	if err != nil {
 		logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request: %s", err.Error()))
 		c.JSON(http.StatusInternalServerError, gin.H{
@@ -117,13 +130,12 @@ func VideoProxy(c *gin.Context) {
 			return
 		}
 		req.Header.Set("x-goog-api-key", apiKey)
-	case constant.ChannelTypeAli:
-		// Video URL is directly in task.FailReason
-		videoURL = task.FailReason
-	default:
-		// Default (Sora, etc.): Use original logic
+	case constant.ChannelTypeOpenAI, constant.ChannelTypeSora:
 		videoURL = fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID)
 		req.Header.Set("Authorization", "Bearer "+channel.Key)
+	default:
+		// Video URL is directly in task.FailReason
+		videoURL = task.FailReason
 	}

 	req.URL, err = url.Parse(videoURL)
--- a/controller/video_proxy_gemini.go
+++ b/controller/video_proxy_gemini.go
@@ -35,10 +35,11 @@ func getGeminiVideoURL(channel *model.Channel, task *model.Task, apiKey string)
 		return "", fmt.Errorf("api key not available for task")
 	}

+	proxy := channel.GetSetting().Proxy
 	resp, err := adaptor.FetchTask(baseURL, apiKey, map[string]any{
 		"task_id": task.TaskID,
 		"action":  task.Action,
-	})
+	}, proxy)
 	if err != nil {
 		return "", fmt.Errorf("fetch task failed: %w", err)
 	}
--- a/docs/api/api_auth.md
+++ b/docs/api/api_auth.md
@@ -1,53 +0,0 @@
-# API 鉴权文档
-
-## 认证方式
-
-### Access Token
-
-对于需要鉴权的 API 接口，必须同时提供以下两个请求头来进行 Access Token 认证：
-
-1. **请求头中的 `Authorization` 字段**
-
-    将 Access Token 放置于 HTTP 请求头部的 `Authorization` 字段中，格式如下：
-
-    ```
-    Authorization: <your_access_token>
-    ```
-
-    其中 `<your_access_token>` 需要替换为实际的 Access Token 值。
-
-2. **请求头中的 `New-Api-User` 字段**
-
-    将用户 ID 放置于 HTTP 请求头部的 `New-Api-User` 字段中，格式如下：
-
-    ```
-    New-Api-User: <your_user_id>
-    ```
-
-    其中 `<your_user_id>` 需要替换为实际的用户 ID。
-
-**注意：**
-
-*   **必须同时提供 `Authorization` 和 `New-Api-User` 两个请求头才能通过鉴权。**
-*   如果只提供其中一个请求头，或者两个请求头都未提供，则会返回 `401 Unauthorized` 错误。
-*   如果 `Authorization` 中的 Access Token 无效，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，access token 无效”。
-*   如果 `New-Api-User` 中的用户 ID 与 Access Token 不匹配，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，与登录用户不匹配，请重新登录”。
-*   如果没有提供 `New-Api-User` 请求头，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，未提供 New-Api-User”。
-*   如果 `New-Api-User` 请求头格式错误，则会返回 `401 Unauthorized` 错误，并提示“无权进行此操作，New-Api-User 格式错误”。
-*   如果用户已被禁用，则会返回 `403 Forbidden` 错误，并提示“用户已被封禁”。
-*   如果用户权限不足，则会返回 `403 Forbidden` 错误，并提示“无权进行此操作，权限不足”。
-*   如果用户信息无效，则会返回 `403 Forbidden` 错误，并提示“无权进行此操作，用户信息无效”。
-
-## Curl 示例
-
-假设您的 Access Token 为 `access_token`，用户 ID 为 `123`，要访问的 API 接口为 `/api/user/self`，则可以使用以下 curl 命令：
-
-```bash
-curl -X GET \
-  -H "Authorization: access_token" \
-  -H "New-Api-User: 123" \
-  https://your-domain.com/api/user/self
-```
-
-请将 `access_token`、`123` 和 `https://your-domain.com` 替换为实际的值。
-
--- a/docs/api/web_api.md
+++ b/docs/api/web_api.md
@@ -1,197 +0,0 @@
-# New API – Web 界面后端接口文档
-
-> 本文档汇总了 **New API** 后端提供给前端 Web 界面的全部 REST 接口（不含 *Relay* 相关接口）。
->
-> 接口前缀统一为 `https://<your-domain>`，以下仅列出 **路径**、**HTTP 方法**、**鉴权要求** 与 **功能简介**。
->
-> 鉴权级别说明：
-> * **公开** – 不需要登录即可调用
-> * **用户** – 需携带用户 Token（`middleware.UserAuth`）
-> * **管理员** – 需管理员 Token（`middleware.AdminAuth`）
-> * **Root** – 仅限最高权限 Root 用户（`middleware.RootAuth`）
-
---
-
-## 1. 初始化 / 系统状态
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET  | /api/setup | 公开 | 获取系统初始化状态 |
-| POST | /api/setup | 公开 | 完成首次安装向导 |
-| GET  | /api/status | 公开 | 获取运行状态摘要 |
-| GET  | /api/uptime/status | 公开 | Uptime-Kuma 兼容状态探针 |
-| GET  | /api/status/test | 管理员 | 测试后端与依赖组件是否正常 |
-
-## 2. 公共信息
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/models | 用户 | 获取前端可用模型列表 |
-| GET | /api/notice | 公开 | 获取公告栏内容 |
-| GET | /api/about | 公开 | 关于页面信息 |
-| GET | /api/home_page_content | 公开 | 首页自定义内容 |
-| GET | /api/pricing | 可匿名/用户 | 价格与套餐信息 |
-| GET | /api/ratio_config | 公开 | 模型倍率配置（仅公开字段） |
-
-## 3. 邮件 / 身份验证
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/verification | 公开 (限流) | 发送邮箱验证邮件 |
-| GET | /api/reset_password | 公开 (限流) | 发送重置密码邮件 |
-| POST | /api/user/reset | 公开 | 提交重置密码请求 |
-
-## 4. OAuth / 第三方登录
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/oauth/github | 公开 | GitHub OAuth 跳转 |
-| GET | /api/oauth/oidc | 公开 | OIDC 通用 OAuth 跳转 |
-| GET | /api/oauth/linuxdo | 公开 | LinuxDo OAuth 跳转 |
-| GET | /api/oauth/wechat | 公开 | 微信扫码登录跳转 |
-| GET | /api/oauth/wechat/bind | 公开 | 微信账户绑定 |
-| GET | /api/oauth/email/bind | 公开 | 邮箱绑定 |
-| GET | /api/oauth/telegram/login | 公开 | Telegram 登录 |
-| GET | /api/oauth/telegram/bind | 公开 | Telegram 账户绑定 |
-| GET | /api/oauth/state | 公开 | 获取随机 state（防 CSRF） |
-
-## 5. 用户模块
-### 5.1 账号注册/登录
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| POST | /api/user/register | 公开 | 注册新账号 |
-| POST | /api/user/login | 公开 | 用户登录 |
-| GET  | /api/user/logout | 用户 | 退出登录 |
-| GET  | /api/user/epay/notify | 公开 | Epay 支付回调 |
-| GET  | /api/user/groups | 公开 | 列出所有分组（无鉴权版） |
-
-### 5.2 用户自身操作 (需登录)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/user/self/groups | 用户 | 获取自己所在分组 |
-| GET | /api/user/self | 用户 | 获取个人资料 |
-| GET | /api/user/models | 用户 | 获取模型可见性 |
-| PUT | /api/user/self | 用户 | 修改个人资料 |
-| DELETE | /api/user/self | 用户 | 注销账号 |
-| GET | /api/user/token | 用户 | 生成用户级别 Access Token |
-| GET | /api/user/aff | 用户 | 获取推广码信息 |
-| POST | /api/user/topup | 用户 | 余额直充 |
-| POST | /api/user/pay | 用户 | 提交支付订单 |
-| POST | /api/user/amount | 用户 | 余额支付 |
-| POST | /api/user/aff_transfer | 用户 | 推广额度转账 |
-| PUT | /api/user/setting | 用户 | 更新用户设置 |
-
-### 5.3 管理员用户管理
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/user/ | 管理员 | 获取全部用户列表 |
-| GET | /api/user/search | 管理员 | 搜索用户 |
-| GET | /api/user/:id | 管理员 | 获取单个用户信息 |
-| POST | /api/user/ | 管理员 | 创建用户 |
-| POST | /api/user/manage | 管理员 | 冻结/重置等管理操作 |
-| PUT | /api/user/ | 管理员 | 更新用户 |
-| DELETE | /api/user/:id | 管理员 | 删除用户 |
-
-## 6. 站点选项 (Root)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/option/ | Root | 获取全局配置 |
-| PUT | /api/option/ | Root | 更新全局配置 |
-| POST | /api/option/rest_model_ratio | Root | 重置模型倍率 |
-| POST | /api/option/migrate_console_setting | Root | 迁移旧版控制台配置 |
-
-## 7. 模型倍率同步 (Root)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/ratio_sync/channels | Root | 获取可同步渠道列表 |
-| POST | /api/ratio_sync/fetch | Root | 从上游拉取倍率 |
-
-## 8. 渠道管理 (管理员)
-| 方法 | 路径 | 说明 |
-|------|------|------|
-| GET | /api/channel/ | 获取渠道列表 |
-| GET | /api/channel/search | 搜索渠道 |
-| GET | /api/channel/models | 查询渠道模型能力 |
-| GET | /api/channel/models_enabled | 查询启用模型能力 |
-| GET | /api/channel/:id | 获取单个渠道 |
-| GET | /api/channel/test | 批量测试渠道连通性 |
-| GET | /api/channel/test/:id | 单个渠道测试 |
-| GET | /api/channel/update_balance | 批量刷新余额 |
-| GET | /api/channel/update_balance/:id | 单个刷新余额 |
-| POST | /api/channel/ | 新增渠道 |
-| PUT | /api/channel/ | 更新渠道 |
-| DELETE | /api/channel/disabled | 删除已禁用渠道 |
-| POST | /api/channel/tag/disabled | 批量禁用标签渠道 |
-| POST | /api/channel/tag/enabled | 批量启用标签渠道 |
-| PUT | /api/channel/tag | 编辑渠道标签 |
-| DELETE | /api/channel/:id | 删除渠道 |
-| POST | /api/channel/batch | 批量删除渠道 |
-| POST | /api/channel/fix | 修复渠道能力表 |
-| GET | /api/channel/fetch_models/:id | 拉取单渠道模型 |
-| POST | /api/channel/fetch_models | 拉取全部渠道模型 |
-| POST | /api/channel/batch/tag | 批量设置渠道标签 |
-| GET | /api/channel/tag/models | 根据标签获取模型 |
-| POST | /api/channel/copy/:id | 复制渠道 |
-
-## 9. Token 管理
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/token/ | 用户 | 获取全部 Token |
-| GET | /api/token/search | 用户 | 搜索 Token |
-| GET | /api/token/:id | 用户 | 获取单个 Token |
-| POST | /api/token/ | 用户 | 创建 Token |
-| PUT | /api/token/ | 用户 | 更新 Token |
-| DELETE | /api/token/:id | 用户 | 删除 Token |
-| POST | /api/token/batch | 用户 | 批量删除 Token |
-
-## 10. 兑换码管理 (管理员)
-| 方法 | 路径 | 说明 |
-|------|------|------|
-| GET | /api/redemption/ | 获取兑换码列表 |
-| GET | /api/redemption/search | 搜索兑换码 |
-| GET | /api/redemption/:id | 获取单个兑换码 |
-| POST | /api/redemption/ | 创建兑换码 |
-| PUT | /api/redemption/ | 更新兑换码 |
-| DELETE | /api/redemption/invalid | 删除无效兑换码 |
-| DELETE | /api/redemption/:id | 删除兑换码 |
-
-## 11. 日志
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/log/ | 管理员 | 获取全部日志 |
-| DELETE | /api/log/ | 管理员 | 删除历史日志 |
-| GET | /api/log/stat | 管理员 | 日志统计 |
-| GET | /api/log/self/stat | 用户 | 我的日志统计 |
-| GET | /api/log/search | 管理员 | 搜索全部日志 |
-| GET | /api/log/self | 用户 | 获取我的日志 |
-| GET | /api/log/self/search | 用户 | 搜索我的日志 |
-| GET | /api/log/token | 公开 | 根据 Token 查询日志（支持 CORS） |
-
-## 12. 数据统计
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/data/ | 管理员 | 全站用量按日期统计 |
-| GET | /api/data/self | 用户 | 我的用量按日期统计 |
-
-## 13. 分组
-| GET | /api/group/ | 管理员 | 获取全部分组列表 |
-
-## 14. Midjourney 任务
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/mj/self | 用户 | 获取自己的 MJ 任务 |
-| GET | /api/mj/ | 管理员 | 获取全部 MJ 任务 |
-
-## 15. 任务中心
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /api/task/self | 用户 | 获取我的任务 |
-| GET | /api/task/ | 管理员 | 获取全部任务 |
-
-## 16. 账户计费面板 (Dashboard)
-| 方法 | 路径 | 鉴权 | 说明 |
-|------|------|------|------|
-| GET | /dashboard/billing/subscription | 用户 Token | 获取订阅额度信息 |
-| GET | /v1/dashboard/billing/subscription | 同上 | 兼容 OpenAI SDK 路径 |
-| GET | /dashboard/billing/usage | 用户 Token | 获取使用量信息 |
-| GET | /v1/dashboard/billing/usage | 同上 | 兼容 OpenAI SDK 路径 |
-
---
-
-> **更新日期**：2025.07.17
--- a/docs/models/Midjourney.md
+++ b/docs/models/Midjourney.md
@@ -1,82 +0,0 @@
-# Midjourney Proxy API文档
-
-**简介**:Midjourney Proxy API文档
-
-## 接口列表
-支持的接口如下：
-+ [x] /mj/submit/imagine
-+ [x] /mj/submit/change
-+ [x] /mj/submit/blend
-+ [x] /mj/submit/describe
-+ [x] /mj/image/{id} （通过此接口获取图片，**请必须在系统设置中填写服务器地址！！**）
-+ [x] /mj/task/{id}/fetch （此接口返回的图片地址为经过One API转发的地址）
-+ [x] /task/list-by-condition
-+ [x] /mj/submit/action （仅midjourney-proxy-plus支持，下同）
-+ [x] /mj/submit/modal
-+ [x] /mj/submit/shorten
-+ [x] /mj/task/{id}/image-seed
-+ [x] /mj/insight-face/swap （InsightFace）
-
-## 模型列表
-
-### midjourney-proxy支持
-
- mj_imagine (绘图)
- mj_variation (变换)
- mj_reroll (重绘)
- mj_blend (混合)
- mj_upscale (放大)
- mj_describe (图生文)
-
-### 仅midjourney-proxy-plus支持
-
- mj_zoom (比例变焦)
- mj_shorten (提示词缩短)
- mj_modal (窗口提交，局部重绘和自定义比例变焦必须和mj_modal一同添加)
- mj_inpaint (局部重绘提交，必须和mj_modal一同添加)
- mj_custom_zoom (自定义比例变焦，必须和mj_modal一同添加)
- mj_high_variation (强变换)
- mj_low_variation (弱变换)
- mj_pan (平移)
- swap_face (换脸)
-
-## 模型价格设置（在设置-运营设置-模型固定价格设置中设置）
-```json
-{
-  "mj_imagine": 0.1,
-  "mj_variation": 0.1,
-  "mj_reroll": 0.1,
-  "mj_blend": 0.1,
-  "mj_modal": 0.1,
-  "mj_zoom": 0.1,
-  "mj_shorten": 0.1,
-  "mj_high_variation": 0.1,
-  "mj_low_variation": 0.1,
-  "mj_pan": 0.1,
-  "mj_inpaint": 0,
-  "mj_custom_zoom": 0,
-  "mj_describe": 0.05,
-  "mj_upscale": 0.05,
-  "swap_face": 0.05
-}
-```
-其中mj_inpaint和mj_custom_zoom的价格设置为0，是因为这两个模型需要搭配mj_modal使用，所以价格由mj_modal决定。
-
-## 渠道设置
-
-### 对接 midjourney-proxy(plus)
-
-1.
-
-部署Midjourney-Proxy，并配置好midjourney账号等（强烈建议设置密钥），[项目地址](https://github.com/novicezk/midjourney-proxy)
-
-2. 在渠道管理中添加渠道，渠道类型选择**Midjourney Proxy**，如果是plus版本选择**Midjourney Proxy Plus**
-   ，模型请参考上方模型列表
-3. **代理**填写midjourney-proxy部署的地址，例如：http://localhost:8080
-4. 密钥填写midjourney-proxy的密钥，如果没有设置密钥，可以随便填
-
-### 对接上游new api
-
-1. 在渠道管理中添加渠道，渠道类型选择**Midjourney Proxy Plus**，模型请参考上方模型列表
-2. **代理**填写上游new api的地址，例如：http://localhost:3000
-3. 密钥填写上游new api的密钥
--- a/docs/models/Rerank.md
+++ b/docs/models/Rerank.md
@@ -1,62 +0,0 @@
-# Rerank API文档
-
-**简介**:Rerank API文档
-
-## 接入Dify
-模型供应商选择Jina，按要求填写模型信息即可接入Dify。
-
-## 请求方式
-
-Post: /v1/rerank
-
-Request:
-
-```json
-{
-  "model": "jina-reranker-v2-base-multilingual",
-  "query": "What is the capital of the United States?",
-  "top_n": 3,
-  "documents": [
-    "Carson City is the capital city of the American state of Nevada.",
-    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
-    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
-    "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
-    "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."
-  ]
-}
-```
-
-Response:
-
-```json
-{
-  "results": [
-    {
-      "document": {
-        "text": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."
-      },
-      "index": 2,
-      "relevance_score": 0.9999702
-    },
-    {
-      "document": {
-        "text": "Carson City is the capital city of the American state of Nevada."
-      },
-      "index": 0,
-      "relevance_score": 0.67800725
-    },
-    {
-      "document": {
-        "text": "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages."
-      },
-      "index": 3,
-      "relevance_score": 0.02800752
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 158,
-    "completion_tokens": 0,
-    "total_tokens": 158
-  }
-}
-```
--- a/docs/models/Suno.md
+++ b/docs/models/Suno.md
@@ -1,44 +0,0 @@
-# Suno API文档
-
-**简介**:Suno API文档
-
-## 接口列表
-支持的接口如下：
-+ [x] /suno/submit/music
-+ [x] /suno/submit/lyrics
-+ [x] /suno/fetch
-+ [x] /suno/fetch/:id
-
-## 模型列表
-
-### Suno API支持
-
- suno_music (自定义模式、灵感模式、续写)
- suno_lyrics (生成歌词)
-
-
-## 模型价格设置（在设置-运营设置-模型固定价格设置中设置）
-```json
-{
-  "suno_music": 0.3,
-  "suno_lyrics": 0.01
-}
-```
-
-## 渠道设置
-
-### 对接 Suno API
-
-1.
-部署 Suno API，并配置好suno账号等（强烈建议设置密钥），[项目地址](https://github.com/Suno-API/Suno-API)
-
-2. 在渠道管理中添加渠道，渠道类型选择**Suno API**
-   ，模型请参考上方模型列表
-3. **代理**填写 Suno API 部署的地址，例如：http://localhost:8080
-4. 密钥填写 Suno API 的密钥，如果没有设置密钥，可以随便填
-
-### 对接上游new api
-
-1. 在渠道管理中添加渠道，渠道类型选择**Suno API**，或任意类型，只需模型包含上方模型列表的模型
-2. **代理**填写上游new api的地址，例如：http://localhost:3000
-3. 密钥填写上游new api的密钥
--- a/docs/openapi/api.json
+++ b/docs/openapi/api.json
--- a/docs/openapi/relay.json
+++ b/docs/openapi/relay.json
--- a/dto/audio.go
+++ b/dto/audio.go
@@ -2,6 +2,7 @@ package dto

 import (
 	"encoding/json"
+	"strings"

 	"github.com/QuantumNous/new-api/types"

@@ -24,11 +25,14 @@ func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
 		CombineText: r.Input,
 		TokenType:   types.TokenTypeTextNumber,
 	}
+	if strings.Contains(r.Model, "gpt") {
+		meta.TokenType = types.TokenTypeTokenizer
+	}
 	return meta
 }

 func (r *AudioRequest) IsStream(c *gin.Context) bool {
-	return false
+	return r.StreamFormat == "sse"
 }

 func (r *AudioRequest) SetModelName(modelName string) {
--- a/dto/claude.go
+++ b/dto/claude.go
@@ -203,6 +203,9 @@ type ClaudeRequest struct {
 	Stream            bool            `json:"stream,omitempty"`
 	Tools             any             `json:"tools,omitempty"`
 	ContextManagement json.RawMessage `json:"context_management,omitempty"`
+	OutputConfig      json.RawMessage `json:"output_config,omitempty"`
+	OutputFormat      json.RawMessage `json:"output_format,omitempty"`
+	Container         json.RawMessage `json:"container,omitempty"`
 	ToolChoice        any             `json:"tool_choice,omitempty"`
 	Thinking          *Thinking       `json:"thinking,omitempty"`
 	McpServers        json.RawMessage `json:"mcp_servers,omitempty"`
@@ -510,11 +513,44 @@ func (c *ClaudeResponse) GetClaudeError() *types.ClaudeError {
 }

 type ClaudeUsage struct {
-	InputTokens              int                  `json:"input_tokens"`
-	CacheCreationInputTokens int                  `json:"cache_creation_input_tokens"`
-	CacheReadInputTokens     int                  `json:"cache_read_input_tokens"`
-	OutputTokens             int                  `json:"output_tokens"`
-	ServerToolUse            *ClaudeServerToolUse `json:"server_tool_use,omitempty"`
+	InputTokens              int                       `json:"input_tokens"`
+	CacheCreationInputTokens int                       `json:"cache_creation_input_tokens"`
+	CacheReadInputTokens     int                       `json:"cache_read_input_tokens"`
+	OutputTokens             int                       `json:"output_tokens"`
+	CacheCreation            *ClaudeCacheCreationUsage `json:"cache_creation,omitempty"`
+	// claude cache 1h
+	ClaudeCacheCreation5mTokens int                  `json:"claude_cache_creation_5_m_tokens"`
+	ClaudeCacheCreation1hTokens int                  `json:"claude_cache_creation_1_h_tokens"`
+	ServerToolUse               *ClaudeServerToolUse `json:"server_tool_use,omitempty"`
+}
+
+type ClaudeCacheCreationUsage struct {
+	Ephemeral5mInputTokens int `json:"ephemeral_5m_input_tokens,omitempty"`
+	Ephemeral1hInputTokens int `json:"ephemeral_1h_input_tokens,omitempty"`
+}
+
+func (u *ClaudeUsage) GetCacheCreation5mTokens() int {
+	if u == nil || u.CacheCreation == nil {
+		return 0
+	}
+	return u.CacheCreation.Ephemeral5mInputTokens
+}
+
+func (u *ClaudeUsage) GetCacheCreation1hTokens() int {
+	if u == nil || u.CacheCreation == nil {
+		return 0
+	}
+	return u.CacheCreation.Ephemeral1hInputTokens
+}
+
+func (u *ClaudeUsage) GetCacheCreationTotalTokens() int {
+	if u == nil {
+		return 0
+	}
+	if u.CacheCreationInputTokens > 0 {
+		return u.CacheCreationInputTokens
+	}
+	return u.GetCacheCreation5mTokens() + u.GetCacheCreation1hTokens()
 }

 type ClaudeServerToolUse struct {
--- a/dto/error.go
+++ b/dto/error.go
@@ -1,26 +1,32 @@
 package dto

-import "github.com/QuantumNous/new-api/types"
+import (
+	"encoding/json"

-type OpenAIError struct {
-	Message string `json:"message"`
-	Type    string `json:"type"`
-	Param   string `json:"param"`
-	Code    any    `json:"code"`
-}
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/types"
+)
+
+//type OpenAIError struct {
+//	Message string `json:"message"`
+//	Type    string `json:"type"`
+//	Param   string `json:"param"`
+//	Code    any    `json:"code"`
+//}

 type OpenAIErrorWithStatusCode struct {
-	Error      OpenAIError `json:"error"`
-	StatusCode int         `json:"status_code"`
+	Error      types.OpenAIError `json:"error"`
+	StatusCode int               `json:"status_code"`
 	LocalError bool
 }

 type GeneralErrorResponse struct {
-	Error    types.OpenAIError `json:"error"`
-	Message  string            `json:"message"`
-	Msg      string            `json:"msg"`
-	Err      string            `json:"err"`
-	ErrorMsg string            `json:"error_msg"`
+	Error    json.RawMessage `json:"error"`
+	Message  string          `json:"message"`
+	Msg      string          `json:"msg"`
+	Err      string          `json:"err"`
+	ErrorMsg string          `json:"error_msg"`
+	Metadata json.RawMessage   `json:"metadata,omitempty"`
 	Header   struct {
 		Message string `json:"message"`
 	} `json:"header"`
@@ -31,9 +37,35 @@ type GeneralErrorResponse struct {
 	} `json:"response"`
 }

+func (e GeneralErrorResponse) TryToOpenAIError() *types.OpenAIError {
+	var openAIError types.OpenAIError
+	if len(e.Error) > 0 {
+		err := common.Unmarshal(e.Error, &openAIError)
+		if err == nil && openAIError.Message != "" {
+			return &openAIError
+		}
+	}
+	return nil
+}
+
 func (e GeneralErrorResponse) ToMessage() string {
-	if e.Error.Message != "" {
-		return e.Error.Message
+	if len(e.Error) > 0 {
+		switch common.GetJsonType(e.Error) {
+		case "object":
+			var openAIError types.OpenAIError
+			err := common.Unmarshal(e.Error, &openAIError)
+			if err == nil && openAIError.Message != "" {
+				return openAIError.Message
+			}
+		case "string":
+			var msg string
+			err := common.Unmarshal(e.Error, &msg)
+			if err == nil && msg != "" {
+				return msg
+			}
+		default:
+			return string(e.Error)
+		}
 	}
 	if e.Message != "" {
 		return e.Message
--- a/dto/gemini.go
+++ b/dto/gemini.go
@@ -22,6 +22,27 @@ type GeminiChatRequest struct {
 	CachedContent      string                     `json:"cachedContent,omitempty"`
 }

+// UnmarshalJSON allows GeminiChatRequest to accept both snake_case and camelCase fields.
+func (r *GeminiChatRequest) UnmarshalJSON(data []byte) error {
+	type Alias GeminiChatRequest
+	var aux struct {
+		Alias
+		SystemInstructionSnake *GeminiChatContent `json:"system_instruction,omitempty"`
+	}
+
+	if err := common.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	*r = GeminiChatRequest(aux.Alias)
+
+	if aux.SystemInstructionSnake != nil {
+		r.SystemInstructions = aux.SystemInstructionSnake
+	}
+
+	return nil
+}
+
 type ToolConfig struct {
 	FunctionCallingConfig *FunctionCallingConfig `json:"functionCallingConfig,omitempty"`
 	RetrievalConfig       *RetrievalConfig       `json:"retrievalConfig,omitempty"`
@@ -141,6 +162,39 @@ func (r *GeminiChatRequest) SetTools(tools []GeminiChatTool) {
 type GeminiThinkingConfig struct {
 	IncludeThoughts bool `json:"includeThoughts,omitempty"`
 	ThinkingBudget  *int `json:"thinkingBudget,omitempty"`
+	// TODO Conflict with thinkingbudget.
+	ThinkingLevel string `json:"thinkingLevel,omitempty"`
+}
+
+// UnmarshalJSON allows GeminiThinkingConfig to accept both snake_case and camelCase fields.
+func (c *GeminiThinkingConfig) UnmarshalJSON(data []byte) error {
+	type Alias GeminiThinkingConfig
+	var aux struct {
+		Alias
+		IncludeThoughtsSnake *bool  `json:"include_thoughts,omitempty"`
+		ThinkingBudgetSnake  *int   `json:"thinking_budget,omitempty"`
+		ThinkingLevelSnake   string `json:"thinking_level,omitempty"`
+	}
+
+	if err := common.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	*c = GeminiThinkingConfig(aux.Alias)
+
+	if aux.IncludeThoughtsSnake != nil {
+		c.IncludeThoughts = *aux.IncludeThoughtsSnake
+	}
+
+	if aux.ThinkingBudgetSnake != nil {
+		c.ThinkingBudget = aux.ThinkingBudgetSnake
+	}
+
+	if aux.ThinkingLevelSnake != "" {
+		c.ThinkingLevel = aux.ThinkingLevelSnake
+	}
+
+	return nil
 }

 func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
@@ -182,8 +236,12 @@ type FunctionCall struct {
 }

 type GeminiFunctionResponse struct {
-	Name     string                 `json:"name"`
-	Response map[string]interface{} `json:"response"`
+	Name         string                 `json:"name"`
+	Response     map[string]interface{} `json:"response"`
+	WillContinue json.RawMessage        `json:"willContinue,omitempty"`
+	Scheduling   json.RawMessage        `json:"scheduling,omitempty"`
+	Parts        json.RawMessage        `json:"parts,omitempty"`
+	ID           json.RawMessage        `json:"id,omitempty"`
 }

 type GeminiPartExecutableCode struct {
@@ -202,11 +260,15 @@ type GeminiFileData struct {
 }

 type GeminiPart struct {
-	Text                string                         `json:"text,omitempty"`
-	Thought             bool                           `json:"thought,omitempty"`
-	InlineData          *GeminiInlineData              `json:"inlineData,omitempty"`
-	FunctionCall        *FunctionCall                  `json:"functionCall,omitempty"`
-	FunctionResponse    *GeminiFunctionResponse        `json:"functionResponse,omitempty"`
+	Text             string                  `json:"text,omitempty"`
+	Thought          bool                    `json:"thought,omitempty"`
+	InlineData       *GeminiInlineData       `json:"inlineData,omitempty"`
+	FunctionCall     *FunctionCall           `json:"functionCall,omitempty"`
+	ThoughtSignature json.RawMessage         `json:"thoughtSignature,omitempty"`
+	FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
+	// Optional. Media resolution for the input media.
+	MediaResolution     json.RawMessage                `json:"mediaResolution,omitempty"`
+	VideoMetadata       json.RawMessage                `json:"videoMetadata,omitempty"`
 	FileData            *GeminiFileData                `json:"fileData,omitempty"`
 	ExecutableCode      *GeminiPartExecutableCode      `json:"executableCode,omitempty"`
 	CodeExecutionResult *GeminiPartCodeExecutionResult `json:"codeExecutionResult,omitempty"`
--- a/dto/openai_image.go
+++ b/dto/openai_image.go
@@ -27,8 +27,11 @@ type ImageRequest struct {
 	OutputCompression json.RawMessage `json:"output_compression,omitempty"`
 	PartialImages     json.RawMessage `json:"partial_images,omitempty"`
 	// Stream            bool            `json:"stream,omitempty"`
-	Watermark *bool           `json:"watermark,omitempty"`
-	Image     json.RawMessage `json:"image,omitempty"`
+	Watermark *bool `json:"watermark,omitempty"`
+	// zhipu 4v
+	WatermarkEnabled json.RawMessage `json:"watermark_enabled,omitempty"`
+	UserId           json.RawMessage `json:"user_id,omitempty"`
+	Image            json.RawMessage `json:"image,omitempty"`
 	// 用匿名参数接收额外参数
 	Extra map[string]json.RawMessage `json:"-"`
 }
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -66,10 +66,11 @@ type GeneralOpenAIRequest struct {
 	// 注意：默认过滤此字段以保护用户隐私，但过滤后可能导致 Codex 无法正常使用
 	Store json.RawMessage `json:"store,omitempty"`
 	// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
-	PromptCacheKey string          `json:"prompt_cache_key,omitempty"`
-	LogitBias      json.RawMessage `json:"logit_bias,omitempty"`
-	Metadata       json.RawMessage `json:"metadata,omitempty"`
-	Prediction     json.RawMessage `json:"prediction,omitempty"`
+	PromptCacheKey       string          `json:"prompt_cache_key,omitempty"`
+	PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
+	LogitBias            json.RawMessage `json:"logit_bias,omitempty"`
+	Metadata             json.RawMessage `json:"metadata,omitempty"`
+	Prediction           json.RawMessage `json:"prediction,omitempty"`
 	// gemini
 	ExtraBody json.RawMessage `json:"extra_body,omitempty"`
 	//xai
@@ -82,6 +83,7 @@ type GeneralOpenAIRequest struct {
 	// Ali Qwen Params
 	VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
 	EnableThinking         any             `json:"enable_thinking,omitempty"`
+	ChatTemplateKwargs     json.RawMessage `json:"chat_template_kwargs,omitempty"`
 	// ollama Params
 	Think json.RawMessage `json:"think,omitempty"`
 	// baidu v2
@@ -232,10 +234,13 @@ func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
 	return "system"
 }

+const CustomType = "custom"
+
 type ToolCallRequest struct {
 	ID       string          `json:"id,omitempty"`
 	Type     string          `json:"type"`
-	Function FunctionRequest `json:"function"`
+	Function FunctionRequest `json:"function,omitempty"`
+	Custom   json.RawMessage `json:"custom,omitempty"`
 }

 type FunctionRequest struct {
@@ -795,19 +800,20 @@ type OpenAIResponsesRequest struct {
 	PreviousResponseID string          `json:"previous_response_id,omitempty"`
 	Reasoning          *Reasoning      `json:"reasoning,omitempty"`
 	// 服务层级字段，用于指定 API 服务等级。允许透传可能导致实际计费高于预期，默认应过滤
-	ServiceTier    string          `json:"service_tier,omitempty"`
-	Store          json.RawMessage `json:"store,omitempty"`
-	PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
-	Stream         bool            `json:"stream,omitempty"`
-	Temperature    float64         `json:"temperature,omitempty"`
-	Text           json.RawMessage `json:"text,omitempty"`
-	ToolChoice     json.RawMessage `json:"tool_choice,omitempty"`
-	Tools          json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少，MCP 参数太多不确定，所以用 map
-	TopP           float64         `json:"top_p,omitempty"`
-	Truncation     string          `json:"truncation,omitempty"`
-	User           string          `json:"user,omitempty"`
-	MaxToolCalls   uint            `json:"max_tool_calls,omitempty"`
-	Prompt         json.RawMessage `json:"prompt,omitempty"`
+	ServiceTier          string          `json:"service_tier,omitempty"`
+	Store                json.RawMessage `json:"store,omitempty"`
+	PromptCacheKey       json.RawMessage `json:"prompt_cache_key,omitempty"`
+	PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
+	Stream               bool            `json:"stream,omitempty"`
+	Temperature          float64         `json:"temperature,omitempty"`
+	Text                 json.RawMessage `json:"text,omitempty"`
+	ToolChoice           json.RawMessage `json:"tool_choice,omitempty"`
+	Tools                json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少，MCP 参数太多不确定，所以用 map
+	TopP                 float64         `json:"top_p,omitempty"`
+	Truncation           string          `json:"truncation,omitempty"`
+	User                 string          `json:"user,omitempty"`
+	MaxToolCalls         uint            `json:"max_tool_calls,omitempty"`
+	Prompt               json.RawMessage `json:"prompt,omitempty"`
 }

 func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
@@ -892,6 +898,12 @@ type Reasoning struct {
 	Summary string `json:"summary,omitempty"`
 }

+type Input struct {
+	Type    string          `json:"type,omitempty"`
+	Role    string          `json:"role,omitempty"`
+	Content json.RawMessage `json:"content,omitempty"`
+}
+
 type MediaInput struct {
 	Type     string `json:"type"`
 	Text     string `json:"text,omitempty"`
@@ -910,7 +922,7 @@ func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
 		return nil
 	}

-	var inputs []MediaInput
+	var mediaInputs []MediaInput

 	// Try string first
 	// if str, ok := common.GetJsonType(r.Input); ok {
@@ -920,60 +932,74 @@ func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
 	if common.GetJsonType(r.Input) == "string" {
 		var str string
 		_ = common.Unmarshal(r.Input, &str)
-		inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
-		return inputs
+		mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
+		return mediaInputs
 	}

 	// Try array of parts
 	if common.GetJsonType(r.Input) == "array" {
-		var array []any
-		_ = common.Unmarshal(r.Input, &array)
-		for _, itemAny := range array {
-			// Already parsed MediaInput
-			if media, ok := itemAny.(MediaInput); ok {
-				inputs = append(inputs, media)
-				continue
+		var inputs []Input
+		_ = common.Unmarshal(r.Input, &inputs)
+		for _, input := range inputs {
+			if common.GetJsonType(input.Content) == "string" {
+				var str string
+				_ = common.Unmarshal(input.Content, &str)
+				mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
 			}
-			// Generic map
-			item, ok := itemAny.(map[string]any)
-			if !ok {
-				continue
-			}
-			typeVal, ok := item["type"].(string)
-			if !ok {
-				continue
-			}
-			switch typeVal {
-			case "input_text":
-				text, _ := item["text"].(string)
-				inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
-			case "input_image":
-				// image_url may be string or object with url field
-				var imageUrl string
-				switch v := item["image_url"].(type) {
-				case string:
-					imageUrl = v
-				case map[string]any:
-					if url, ok := v["url"].(string); ok {
-						imageUrl = url
+
+			if common.GetJsonType(input.Content) == "array" {
+				var array []any
+				_ = common.Unmarshal(input.Content, &array)
+				for _, itemAny := range array {
+					// Already parsed MediaContent
+					if media, ok := itemAny.(MediaInput); ok {
+						mediaInputs = append(mediaInputs, media)
+						continue
+					}
+
+					// Generic map
+					item, ok := itemAny.(map[string]any)
+					if !ok {
+						continue
+					}
+
+					typeVal, ok := item["type"].(string)
+					if !ok {
+						continue
+					}
+					switch typeVal {
+					case "input_text":
+						text, _ := item["text"].(string)
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
+					case "input_image":
+						// image_url may be string or object with url field
+						var imageUrl string
+						switch v := item["image_url"].(type) {
+						case string:
+							imageUrl = v
+						case map[string]any:
+							if url, ok := v["url"].(string); ok {
+								imageUrl = url
+							}
+						}
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
+					case "input_file":
+						// file_url may be string or object with url field
+						var fileUrl string
+						switch v := item["file_url"].(type) {
+						case string:
+							fileUrl = v
+						case map[string]any:
+							if url, ok := v["url"].(string); ok {
+								fileUrl = url
+							}
+						}
+						mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
 					}
 				}
-				inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
-			case "input_file":
-				// file_url may be string or object with url field
-				var fileUrl string
-				switch v := item["file_url"].(type) {
-				case string:
-					fileUrl = v
-				case map[string]any:
-					if url, ok := v["url"].(string); ok {
-						fileUrl = url
-					}
-				}
-				inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
 			}
 		}
 	}

-	return inputs
+	return mediaInputs
 }
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -230,6 +230,11 @@ type Usage struct {
 	InputTokens            int                `json:"input_tokens"`
 	OutputTokens           int                `json:"output_tokens"`
 	InputTokensDetails     *InputTokenDetails `json:"input_tokens_details"`
+
+	// claude cache 1h
+	ClaudeCacheCreation5mTokens int `json:"claude_cache_creation_5_m_tokens"`
+	ClaudeCacheCreation1hTokens int `json:"claude_cache_creation_1_h_tokens"`
+
 	// OpenRouter Params
 	Cost any `json:"cost,omitempty"`
 }
--- a/electron/package-lock.json
+++ b/electron/package-lock.json
@@ -2784,9 +2784,9 @@
      }
    },
    "node_modules/js-yaml": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
-      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
+      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
--- a/go.mod
+++ b/go.mod
@@ -27,13 +27,14 @@ require (
 	github.com/golang-jwt/jwt/v5 v5.3.0
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/websocket v1.5.0
+	github.com/grafana/pyroscope-go v1.2.7
 	github.com/jfreymuth/oggvorbis v1.0.5
 	github.com/jinzhu/copier v0.4.0
 	github.com/joho/godotenv v1.5.1
 	github.com/mewkiz/flac v1.0.13
 	github.com/pkg/errors v0.9.1
 	github.com/pquerna/otp v1.5.0
-	github.com/samber/lo v1.39.0
+	github.com/samber/lo v1.52.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
 	github.com/shopspring/decimal v1.4.0
 	github.com/stripe/stripe-go/v81 v81.4.0
@@ -43,10 +44,10 @@ require (
 	github.com/tidwall/sjson v1.2.5
 	github.com/tiktoken-go/tokenizer v0.6.2
 	github.com/yapingcat/gomedia v0.0.0-20240906162731-17feea57090c
-	golang.org/x/crypto v0.42.0
+	golang.org/x/crypto v0.45.0
 	golang.org/x/image v0.23.0
-	golang.org/x/net v0.43.0
-	golang.org/x/sync v0.17.0
+	golang.org/x/net v0.47.0
+	golang.org/x/sync v0.18.0
 	gorm.io/driver/mysql v1.4.3
 	gorm.io/driver/postgres v1.5.2
 	gorm.io/gorm v1.25.2
@@ -77,11 +78,11 @@ require (
 	github.com/go-sql-driver/mysql v1.7.0 // indirect
 	github.com/go-webauthn/x v0.1.25 // indirect
 	github.com/goccy/go-json v0.10.2 // indirect
-	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/go-tpm v0.9.5 // indirect
 	github.com/gorilla/context v1.1.1 // indirect
 	github.com/gorilla/securecookie v1.1.1 // indirect
 	github.com/gorilla/sessions v1.2.1 // indirect
+	github.com/grafana/pyroscope-go/godeltaprof v0.1.9 // indirect
 	github.com/icza/bitio v1.1.0 // indirect
 	github.com/jackc/pgpassfile v1.0.0 // indirect
 	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
@@ -91,6 +92,7 @@ require (
 	github.com/jinzhu/inflection v1.0.0 // indirect
 	github.com/jinzhu/now v1.1.5 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/compress v1.17.8 // indirect
 	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/leodido/go-urn v1.4.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
@@ -99,6 +101,7 @@ require (
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/ncruces/go-strftime v0.1.9 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.1 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/tidwall/match v1.1.1 // indirect
@@ -110,13 +113,13 @@ require (
 	github.com/x448/float16 v0.8.4 // indirect
 	github.com/yusufpapurcu/wmi v1.2.3 // indirect
 	golang.org/x/arch v0.21.0 // indirect
-	golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 // indirect
-	golang.org/x/sys v0.36.0 // indirect
-	golang.org/x/text v0.29.0 // indirect
+	golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
 	google.golang.org/protobuf v1.34.2 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	modernc.org/libc v1.22.5 // indirect
-	modernc.org/mathutil v1.5.0 // indirect
-	modernc.org/memory v1.5.0 // indirect
-	modernc.org/sqlite v1.23.1 // indirect
+	modernc.org/libc v1.66.10 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+	modernc.org/sqlite v1.40.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -118,8 +118,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/google/go-tpm v0.9.5 h1:ocUmnDebX54dnW+MQWGQRbdaAcJELsa6PqZhJ48KwVU=
 github.com/google/go-tpm v0.9.5/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
-github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -131,6 +131,10 @@ github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7Fsg
 github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
 github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
 github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/grafana/pyroscope-go v1.2.7 h1:VWBBlqxjyR0Cwk2W6UrE8CdcdD80GOFNutj0Kb1T8ac=
+github.com/grafana/pyroscope-go v1.2.7/go.mod h1:o/bpSLiJYYP6HQtvcoVKiE9s5RiNgjYTj1DhiddP2Pc=
+github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og=
+github.com/grafana/pyroscope-go/godeltaprof v0.1.9/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
 github.com/icza/bitio v1.1.0 h1:ysX4vtldjdi3Ygai5m1cWy4oLkhWTAi+SyO6HC8L9T0=
 github.com/icza/bitio v1.1.0/go.mod h1:0jGnlLAx8MKMr9VGnn/4YrvZiprkvBelsVIbA9Jjr9A=
 github.com/icza/mighty v0.0.0-20180919140131-cfd07d671de6 h1:8UsGZ2rr2ksmEru6lToqnXgA8Mz1DP11X4zSJ159C3k=
@@ -159,12 +163,15 @@ github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwA
 github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
+github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
 github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
 github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
-github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
 github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
@@ -193,6 +200,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
+github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
 github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -211,14 +220,13 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pquerna/otp v1.5.0 h1:NMMR+WrmaqXU4EzdGJEE1aUUI0AMRzsp96fFFWNPwxs=
 github.com/pquerna/otp v1.5.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
 github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
 github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
-github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
-github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
+github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
+github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
 github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
 github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
@@ -226,6 +234,7 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
 github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -281,18 +290,20 @@ go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
 golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw=
 golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
 golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
-golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
-golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 h1:985EYyeCOxTpcgOTJpflJUwOeEz0CQOdPt73OzpE9F8=
-golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
+golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
 golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
 golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
-golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -304,16 +315,18 @@ golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
-golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
-golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
@@ -343,11 +356,29 @@ gorm.io/driver/postgres v1.5.2/go.mod h1:fmpX0m2I1PKuR7mKZiEluwrP3hbs+ps7JIGMUBp
 gorm.io/gorm v1.23.8/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk=
 gorm.io/gorm v1.25.2 h1:gs1o6Vsa+oVKG/a9ElL3XgyGfghFfkKA2SInQaCyMho=
 gorm.io/gorm v1.25.2/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k=
-modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE=
-modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
-modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
-modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
-modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
-modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
-modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM=
-modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk=
+modernc.org/cc/v4 v4.26.5 h1:xM3bX7Mve6G8K8b+T11ReenJOT+BmVqQj0FY5T4+5Y4=
+modernc.org/cc/v4 v4.26.5/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
+modernc.org/ccgo/v4 v4.28.1 h1:wPKYn5EC/mYTqBO373jKjvX2n+3+aK7+sICCv4Fjy1A=
+modernc.org/ccgo/v4 v4.28.1/go.mod h1:uD+4RnfrVgE6ec9NGguUNdhqzNIeeomeXf6CL0GTE5Q=
+modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
+modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.66.10 h1:yZkb3YeLx4oynyR+iUsXsybsX4Ubx7MQlSYEw4yj59A=
+modernc.org/libc v1.66.10/go.mod h1:8vGSEwvoUoltr4dlywvHqjtAqHBaw0j1jI7iFBTAr2I=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
+modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.40.1 h1:VfuXcxcUWWKRBuP8+BR9L7VnmusMgBNNnBYGEe9w/iY=
+modernc.org/sqlite v1.40.1/go.mod h1:9fjQZ0mB1LLP0GYrp39oOJXx/I2sxEnZtzCmEQIKvGE=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
--- a/logger/logger.go
+++ b/logger/logger.go
@@ -67,8 +67,10 @@ func LogError(ctx context.Context, msg string) {
 }

 func LogDebug(ctx context.Context, msg string, args ...any) {
-	msg = fmt.Sprintf(msg, args...)
 	if common.DebugEnabled {
+		if len(args) > 0 {
+			msg = fmt.Sprintf(msg, args...)
+		}
 		logHelper(ctx, loggerDebug, msg)
 	}
 }
--- a/main.go
+++ b/main.go
@@ -124,6 +124,11 @@ func main() {
 		common.SysLog("pprof enabled")
 	}

+	err = common.StartPyroScope()
+	if err != nil {
+		common.SysError(fmt.Sprintf("start pyroscope error : %v", err))
+	}
+
 	// Initialize HTTP server
 	server := gin.New()
 	server.Use(gin.CustomRecovery(func(c *gin.Context, err any) {
--- a/middleware/auth.go
+++ b/middleware/auth.go
@@ -2,12 +2,14 @@ package middleware

 import (
 	"fmt"
+	"net"
 	"net/http"
 	"strconv"
 	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/logger"
 	"github.com/QuantumNous/new-api/model"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/ratio_setting"
@@ -216,10 +218,14 @@ func TokenAuth() func(c *gin.Context) {
 		}
 		key := c.Request.Header.Get("Authorization")
 		parts := make([]string, 0)
-		key = strings.TrimPrefix(key, "Bearer ")
+		if strings.HasPrefix(key, "Bearer ") || strings.HasPrefix(key, "bearer ") {
+			key = strings.TrimSpace(key[7:])
+		}
 		if key == "" || key == "midjourney-proxy" {
 			key = c.Request.Header.Get("mj-api-secret")
-			key = strings.TrimPrefix(key, "Bearer ")
+			if strings.HasPrefix(key, "Bearer ") || strings.HasPrefix(key, "bearer ") {
+				key = strings.TrimSpace(key[7:])
+			}
 			key = strings.TrimPrefix(key, "sk-")
 			parts = strings.Split(key, "-")
 			key = parts[0]
@@ -240,13 +246,20 @@ func TokenAuth() func(c *gin.Context) {
 			return
 		}

-		allowIpsMap := token.GetIpLimitsMap()
-		if len(allowIpsMap) != 0 {
+		allowIps := token.GetIpLimits()
+		if len(allowIps) > 0 {
 			clientIp := c.ClientIP()
-			if _, ok := allowIpsMap[clientIp]; !ok {
+			logger.LogDebug(c, "Token has IP restrictions, checking client IP %s", clientIp)
+			ip := net.ParseIP(clientIp)
+			if ip == nil {
+				abortWithOpenAiMessage(c, http.StatusForbidden, "无法解析客户端 IP 地址")
+				return
+			}
+			if common.IsIpInCIDRList(ip, allowIps) == false {
 				abortWithOpenAiMessage(c, http.StatusForbidden, "您的 IP 不在令牌允许访问的列表中")
 				return
 			}
+			logger.LogDebug(c, "Client IP %s passed the token IP restrictions check", clientIp)
 		}

 		userCache, err := model.GetUserCache(token.UserId)
@@ -307,7 +320,8 @@ func SetupContextForToken(c *gin.Context, token *model.Token, parts ...string) e
 	} else {
 		c.Set("token_model_limit_enabled", false)
 	}
-	c.Set("token_group", token.Group)
+	common.SetContextKey(c, constant.ContextKeyTokenGroup, token.Group)
+	common.SetContextKey(c, constant.ContextKeyTokenCrossGroupRetry, token.CrossGroupRetry)
 	if len(parts) > 1 {
 		if model.IsAdmin(token.UserId) {
 			c.Set("specific_channel_id", parts[1])
--- a/middleware/distributor.go
+++ b/middleware/distributor.go
@@ -97,7 +97,12 @@ func Distribute() func(c *gin.Context) {
 						common.SetContextKey(c, constant.ContextKeyUsingGroup, usingGroup)
 					}
 				}
-				channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(c, usingGroup, modelRequest.Model, 0)
+				channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(&service.RetryParam{
+					Ctx:        c,
+					ModelName:  modelRequest.Model,
+					TokenGroup: usingGroup,
+					Retry:      common.GetPointer(0),
+				})
 				if err != nil {
 					showGroup := usingGroup
 					if usingGroup == "auto" {
@@ -157,7 +162,7 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 			}
 			midjourneyModel, mjErr, success := service.GetMjRequestModel(relayMode, &midjourneyRequest)
 			if mjErr != nil {
-				return nil, false, fmt.Errorf(mjErr.Description)
+				return nil, false, fmt.Errorf("%s", mjErr.Description)
 			}
 			if midjourneyModel == "" {
 				if !success {
@@ -181,6 +186,10 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		}
 		c.Set("platform", string(constant.TaskPlatformSuno))
 		c.Set("relay_mode", relayMode)
+	} else if strings.Contains(c.Request.URL.Path, "/v1/videos/") && strings.HasSuffix(c.Request.URL.Path, "/remix") {
+		relayMode := relayconstant.RelayModeVideoSubmit
+		c.Set("relay_mode", relayMode)
+		shouldSelectChannel = false
 	} else if strings.Contains(c.Request.URL.Path, "/v1/videos") {
 		//curl https://api.openai.com/v1/videos \
 		//  -H "Authorization: Bearer $OPENAI_API_KEY" \
--- a/middleware/gzip.go
+++ b/middleware/gzip.go
@@ -5,32 +5,69 @@ import (
 	"io"
 	"net/http"

+	"github.com/QuantumNous/new-api/constant"
 	"github.com/andybalholm/brotli"
 	"github.com/gin-gonic/gin"
 )

+type readCloser struct {
+	io.Reader
+	closeFn func() error
+}
+
+func (rc *readCloser) Close() error {
+	if rc.closeFn != nil {
+		return rc.closeFn()
+	}
+	return nil
+}
+
 func DecompressRequestMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if c.Request.Body == nil || c.Request.Method == http.MethodGet {
 			c.Next()
 			return
 		}
+		maxMB := constant.MaxRequestBodyMB
+		if maxMB <= 0 {
+			maxMB = 32
+		}
+		maxBytes := int64(maxMB) << 20
+
+		origBody := c.Request.Body
+		wrapMaxBytes := func(body io.ReadCloser) io.ReadCloser {
+			return http.MaxBytesReader(c.Writer, body, maxBytes)
+		}
+
 		switch c.GetHeader("Content-Encoding") {
 		case "gzip":
-			gzipReader, err := gzip.NewReader(c.Request.Body)
+			gzipReader, err := gzip.NewReader(origBody)
 			if err != nil {
+				_ = origBody.Close()
 				c.AbortWithStatus(http.StatusBadRequest)
 				return
 			}
-			defer gzipReader.Close()
-
-			// Replace the request body with the decompressed data
-			c.Request.Body = io.NopCloser(gzipReader)
+			// Replace the request body with the decompressed data, and enforce a max size (post-decompression).
+			c.Request.Body = wrapMaxBytes(&readCloser{
+				Reader: gzipReader,
+				closeFn: func() error {
+					_ = gzipReader.Close()
+					return origBody.Close()
+				},
+			})
 			c.Request.Header.Del("Content-Encoding")
 		case "br":
-			reader := brotli.NewReader(c.Request.Body)
-			c.Request.Body = io.NopCloser(reader)
+			reader := brotli.NewReader(origBody)
+			c.Request.Body = wrapMaxBytes(&readCloser{
+				Reader: reader,
+				closeFn: func() error {
+					return origBody.Close()
+				},
+			})
 			c.Request.Header.Del("Content-Encoding")
+		default:
+			// Even for uncompressed bodies, enforce a max size to avoid huge request allocations.
+			c.Request.Body = wrapMaxBytes(origBody)
 		}

 		// Continue processing the request
--- a/middleware/rate-limit.go
+++ b/middleware/rate-limit.go
@@ -102,7 +102,10 @@ func GlobalAPIRateLimit() func(c *gin.Context) {
 }

 func CriticalRateLimit() func(c *gin.Context) {
-	return rateLimitFactory(common.CriticalRateLimitNum, common.CriticalRateLimitDuration, "CT")
+	if common.CriticalRateLimitEnable {
+		return rateLimitFactory(common.CriticalRateLimitNum, common.CriticalRateLimitDuration, "CT")
+	}
+	return defNext
 }

 func DownloadRateLimit() func(c *gin.Context) {
--- a/model/channel.go
+++ b/model/channel.go
@@ -138,9 +138,11 @@ func (channel *Channel) GetNextEnabledKey() (string, int, *types.NewAPIError) {
 			enabledIdx = append(enabledIdx, i)
 		}
 	}
-	// If no specific status list or none enabled, fall back to first key
+	// If no specific status list or none enabled, return an explicit error so caller can
+	// properly handle a channel with no available keys (e.g. mark channel disabled).
+	// Returning the first key here caused requests to keep using an already-disabled key.
 	if len(enabledIdx) == 0 {
-		return keys[0], 0, nil
+		return "", 0, types.NewError(errors.New("no enabled keys"), types.ErrorCodeChannelNoAvailableKey)
 	}

 	switch channel.ChannelInfo.MultiKeyMode {
@@ -252,6 +254,9 @@ func (channel *Channel) Save() error {
 }

 func (channel *Channel) SaveWithoutKey() error {
+	if channel.Id == 0 {
+		return errors.New("channel ID is 0")
+	}
 	return DB.Omit("key").Save(channel).Error
 }

@@ -270,13 +275,17 @@ func GetAllChannels(startIdx int, num int, selectAll bool, idSort bool) ([]*Chan
 	return channels, err
 }

-func GetChannelsByTag(tag string, idSort bool) ([]*Channel, error) {
+func GetChannelsByTag(tag string, idSort bool, selectAll bool) ([]*Channel, error) {
 	var channels []*Channel
 	order := "priority desc"
 	if idSort {
 		order = "id desc"
 	}
-	err := DB.Where("tag = ?", tag).Order(order).Find(&channels).Error
+	query := DB.Where("tag = ?", tag).Order(order)
+	if !selectAll {
+		query = query.Omit("key")
+	}
+	err := query.Find(&channels).Error
 	return channels, err
 }

@@ -688,7 +697,7 @@ func DisableChannelByTag(tag string) error {
 	return err
 }

-func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *string, group *string, priority *int64, weight *uint) error {
+func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *string, group *string, priority *int64, weight *uint, paramOverride *string, headerOverride *string) error {
 	updateData := Channel{}
 	shouldReCreateAbilities := false
 	updatedTag := tag
@@ -714,13 +723,19 @@ func EditChannelByTag(tag string, newTag *string, modelMapping *string, models *
 	if weight != nil {
 		updateData.Weight = weight
 	}
+	if paramOverride != nil {
+		updateData.ParamOverride = paramOverride
+	}
+	if headerOverride != nil {
+		updateData.HeaderOverride = headerOverride
+	}

 	err := DB.Model(&Channel{}).Where("tag = ?", tag).Updates(updateData).Error
 	if err != nil {
 		return err
 	}
 	if shouldReCreateAbilities {
-		channels, err := GetChannelsByTag(updatedTag, false)
+		channels, err := GetChannelsByTag(updatedTag, false, false)
 		if err == nil {
 			for _, channel := range channels {
 				err = channel.UpdateAbilities(nil)
--- a/model/task.go
+++ b/model/task.go
@@ -429,3 +429,14 @@ func TaskCountAllUserTask(userId int, queryParams SyncTaskQueryParams) int64 {
 	_ = query.Count(&total).Error
 	return total
 }
+func (t *Task) ToOpenAIVideo() *dto.OpenAIVideo {
+	openAIVideo := dto.NewOpenAIVideo()
+	openAIVideo.ID = t.TaskID
+	openAIVideo.Status = t.Status.ToVideoStatus()
+	openAIVideo.Model = t.Properties.OriginModelName
+	openAIVideo.SetProgressStr(t.Progress)
+	openAIVideo.CreatedAt = t.CreatedAt
+	openAIVideo.CompletedAt = t.UpdatedAt
+	openAIVideo.SetMetadata("url", t.FailReason)
+	return openAIVideo
+}
--- a/model/token.go
+++ b/model/token.go
@@ -6,7 +6,6 @@ import (
 	"strings"

 	"github.com/QuantumNous/new-api/common"
-
 	"github.com/bytedance/gopkg/util/gopool"
 	"gorm.io/gorm"
 )
@@ -27,6 +26,7 @@ type Token struct {
 	AllowIps           *string        `json:"allow_ips" gorm:"default:''"`
 	UsedQuota          int            `json:"used_quota" gorm:"default:0"` // used quota
 	Group              string         `json:"group" gorm:"default:''"`
+	CrossGroupRetry    bool           `json:"cross_group_retry" gorm:"default:false"` // 跨分组重试，仅auto分组有效
 	DeletedAt          gorm.DeletedAt `gorm:"index"`
 }

@@ -34,26 +34,26 @@ func (token *Token) Clean() {
 	token.Key = ""
 }

-func (token *Token) GetIpLimitsMap() map[string]any {
+func (token *Token) GetIpLimits() []string {
 	// delete empty spaces
 	//split with \n
-	ipLimitsMap := make(map[string]any)
+	ipLimits := make([]string, 0)
 	if token.AllowIps == nil {
-		return ipLimitsMap
+		return ipLimits
 	}
 	cleanIps := strings.ReplaceAll(*token.AllowIps, " ", "")
 	if cleanIps == "" {
-		return ipLimitsMap
+		return ipLimits
 	}
 	ips := strings.Split(cleanIps, "\n")
 	for _, ip := range ips {
 		ip = strings.TrimSpace(ip)
 		ip = strings.ReplaceAll(ip, ",", "")
-		if common.IsIP(ip) {
-			ipLimitsMap[ip] = true
+		if ip != "" {
+			ipLimits = append(ipLimits, ip)
 		}
 	}
-	return ipLimitsMap
+	return ipLimits
 }

 func GetAllUserTokens(userId int, startIdx int, num int) ([]*Token, error) {
@@ -112,7 +112,12 @@ func ValidateUserToken(key string) (token *Token, err error) {
 		}
 		return token, nil
 	}
-	return nil, errors.New("无效的令牌")
+	common.SysLog("ValidateUserToken: failed to get token: " + err.Error())
+	if errors.Is(err, gorm.ErrRecordNotFound) {
+		return nil, errors.New("无效的令牌")
+	} else {
+		return nil, errors.New("无效的令牌，数据库查询出错，请联系管理员")
+	}
 }

 func GetTokenByIds(id int, userId int) (*Token, error) {
@@ -185,7 +190,7 @@ func (token *Token) Update() (err error) {
 		}
 	}()
 	err = DB.Model(token).Select("name", "status", "expired_time", "remain_quota", "unlimited_quota",
-		"model_limits_enabled", "model_limits", "allow_ips", "group").Updates(token).Error
+		"model_limits_enabled", "model_limits", "allow_ips", "group", "cross_group_retry").Updates(token).Error
 	return err
 }

--- a/model/user.go
+++ b/model/user.go
@@ -27,6 +27,7 @@ type User struct {
 	Status           int            `json:"status" gorm:"type:int;default:1"` // enabled, disabled
 	Email            string         `json:"email" gorm:"index" validate:"max=50"`
 	GitHubId         string         `json:"github_id" gorm:"column:github_id;index"`
+	DiscordId        string         `json:"discord_id" gorm:"column:discord_id;index"`
 	OidcId           string         `json:"oidc_id" gorm:"column:oidc_id;index"`
 	WeChatId         string         `json:"wechat_id" gorm:"column:wechat_id;index"`
 	TelegramId       string         `json:"telegram_id" gorm:"column:telegram_id;index"`
@@ -184,6 +185,11 @@ func GetMaxUserId() int {
 	return user.Id
 }

+// GetAllUsers retrieves a paginated list of users and the total number of users.
+// It returns results that include soft-deleted records, ordered by id descending,
+// and omits the password field from the returned user objects.
+// The pageInfo parameter provides the page size and start index for pagination.
+// Returns the slice of users, the total user count, and any error encountered.
 func GetAllUsers(pageInfo *common.PageInfo) (users []*User, total int64, err error) {
 	// Start transaction
 	tx := DB.Begin()
@@ -218,7 +224,10 @@ func GetAllUsers(pageInfo *common.PageInfo) (users []*User, total int64, err err
 	return users, total, nil
 }

-func SearchUsers(keyword string, group string, startIdx int, num int) ([]*User, int64, error) {
+// SearchUsers searches for users matching the provided keyword, group, and exact-match filters.
+// The function accepts a keyword (performs LIKE match on username, email, and display_name; if the keyword is numeric it also matches id), a group to scope results, a map of exact-match filters (allowed keys: "github_id", "discord_id", "oidc_id", "wechat_id", "email", "telegram_id", "linux_do_id"), and pagination parameters startIdx and num.
+// It returns the matched users, the total number of records matching the query (ignoring pagination), and an error if the operation fails.
+func SearchUsers(keyword string, group string, filters map[string]string, startIdx int, num int) ([]*User, int64, error) {
 	var users []*User
 	var total int64
 	var err error
@@ -237,32 +246,46 @@ func SearchUsers(keyword string, group string, startIdx int, num int) ([]*User,
 	// 构建基础查询
 	query := tx.Unscoped().Model(&User{})

-	// 构建搜索条件
-	likeCondition := "username LIKE ? OR email LIKE ? OR display_name LIKE ?"
+	// 允许的过滤字段白名单
+	allowedFields := map[string]bool{
+		"github_id":   true,
+		"discord_id":  true,
+		"oidc_id":     true,
+		"wechat_id":   true,
+		"email":       true,
+		"telegram_id": true,
+		"linux_do_id": true,
+	}

-	// 尝试将关键字转换为整数ID
-	keywordInt, err := strconv.Atoi(keyword)
-	if err == nil {
-		// 如果是数字，同时搜索ID和其他字段
-		likeCondition = "id = ? OR " + likeCondition
-		if group != "" {
-			query = query.Where("("+likeCondition+") AND "+commonGroupCol+" = ?",
-				keywordInt, "%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%", group)
-		} else {
+	// 应用精确匹配过滤器
+	for field, value := range filters {
+		if value != "" && allowedFields[field] {
+			query = query.Where(field+" = ?", value)
+		}
+	}
+
+	// 构建搜索条件
+	if keyword != "" {
+		likeCondition := "username LIKE ? OR email LIKE ? OR display_name LIKE ?"
+
+		// 尝试将关键字转换为整数ID
+		keywordInt, err := strconv.Atoi(keyword)
+		if err == nil {
+			// 如果是数字，同时搜索ID和其他字段
+			likeCondition = "id = ? OR " + likeCondition
 			query = query.Where(likeCondition,
 				keywordInt, "%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%")
-		}
-	} else {
-		// 非数字关键字，只搜索字符串字段
-		if group != "" {
-			query = query.Where("("+likeCondition+") AND "+commonGroupCol+" = ?",
-				"%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%", group)
 		} else {
+			// 非数字关键字，只搜索字符串字段
 			query = query.Where(likeCondition,
 				"%"+keyword+"%", "%"+keyword+"%", "%"+keyword+"%")
 		}
 	}

+	if group != "" {
+		query = query.Where(commonGroupCol+" = ?", group)
+	}
+
 	// 获取总数
 	err = query.Count(&total).Error
 	if err != nil {
@@ -539,6 +562,14 @@ func (user *User) FillUserByGitHubId() error {
 	return nil
 }

+func (user *User) FillUserByDiscordId() error {
+	if user.DiscordId == "" {
+		return errors.New("discord id 为空！")
+	}
+	DB.Where(User{DiscordId: user.DiscordId}).First(user)
+	return nil
+}
+
 func (user *User) FillUserByOidcId() error {
 	if user.OidcId == "" {
 		return errors.New("oidc id 为空！")
@@ -578,6 +609,10 @@ func IsGitHubIdAlreadyTaken(githubId string) bool {
 	return DB.Unscoped().Where("github_id = ?", githubId).Find(&User{}).RowsAffected == 1
 }

+func IsDiscordIdAlreadyTaken(discordId string) bool {
+	return DB.Unscoped().Where("discord_id = ?", discordId).Find(&User{}).RowsAffected == 1
+}
+
 func IsOidcIdAlreadyTaken(oidcId string) bool {
 	return DB.Where("oidc_id = ?", oidcId).Find(&User{}).RowsAffected == 1
 }
@@ -915,4 +950,4 @@ func RootUserExists() bool {
 		return false
 	}
 	return true
-}
+}
--- a/relay/audio_handler.go
+++ b/relay/audio_handler.go
@@ -67,8 +67,11 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
 		service.ResetStatusCode(newAPIError, statusCodeMappingStr)
 		return newAPIError
 	}
-
-	postConsumeQuota(c, info, usage.(*dto.Usage), "")
+	if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
+		service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
+	} else {
+		postConsumeQuota(c, info, usage.(*dto.Usage), "")
+	}

 	return nil
 }
--- a/relay/channel/adapter.go
+++ b/relay/channel/adapter.go
@@ -47,7 +47,7 @@ type TaskAdaptor interface {
 	GetChannelName() string

 	// FetchTask
-	FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error)
+	FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error)

 	ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error)
 }
--- a/relay/channel/ali/adaptor.go
+++ b/relay/channel/ali/adaptor.go
@@ -47,7 +47,11 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 		case constant.RelayModeImagesGenerations:
 			fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/text2image/image-synthesis", info.ChannelBaseUrl)
 		case constant.RelayModeImagesEdits:
-			fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/multimodal-generation/generation", info.ChannelBaseUrl)
+			if isWanModel(info.OriginModelName) {
+				fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/image2image/image-synthesis", info.ChannelBaseUrl)
+			} else {
+				fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/multimodal-generation/generation", info.ChannelBaseUrl)
+			}
 		case constant.RelayModeCompletions:
 			fullRequestURL = fmt.Sprintf("%s/compatible-mode/v1/completions", info.ChannelBaseUrl)
 		default:
@@ -71,6 +75,9 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 		req.Set("X-DashScope-Async", "enable")
 	}
 	if info.RelayMode == constant.RelayModeImagesEdits {
+		if isWanModel(info.OriginModelName) {
+			req.Set("X-DashScope-Async", "enable")
+		}
 		req.Set("Content-Type", "application/json")
 	}
 	return nil
@@ -82,15 +89,15 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	}
 	// docs: https://bailian.console.aliyun.com/?tab=api#/api/?type=model&url=2712216
 	// fix: InternalError.Algo.InvalidParameter: The value of the enable_thinking parameter is restricted to True.
-	if strings.Contains(request.Model, "thinking") {
-		request.EnableThinking = true
-		request.Stream = true
-		info.IsStream = true
-	}
-	// fix: ali parameter.enable_thinking must be set to false for non-streaming calls
-	if !info.IsStream {
-		request.EnableThinking = false
-	}
+	//if strings.Contains(request.Model, "thinking") {
+	//	request.EnableThinking = true
+	//	request.Stream = true
+	//	info.IsStream = true
+	//}
+	//// fix: ali parameter.enable_thinking must be set to false for non-streaming calls
+	//if !info.IsStream {
+	//	request.EnableThinking = false
+	//}

 	switch info.RelayMode {
 	default:
@@ -107,6 +114,9 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 		}
 		return aliRequest, nil
 	} else if info.RelayMode == constant.RelayModeImagesEdits {
+		if isWanModel(info.OriginModelName) {
+			return oaiFormEdit2WanxImageEdit(c, info, request)
+		}
 		// ali image edit https://bailian.console.aliyun.com/?tab=api#/api/?type=model&url=2976416
 		// 如果用户使用表单，则需要解析表单数据
 		if strings.Contains(c.Request.Header.Get("Content-Type"), "multipart/form-data") {
@@ -161,7 +171,11 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		case constant.RelayModeImagesGenerations:
 			err, usage = aliImageHandler(c, resp, info)
 		case constant.RelayModeImagesEdits:
-			err, usage = aliImageEditHandler(c, resp, info)
+			if isWanModel(info.OriginModelName) {
+				err, usage = aliImageHandler(c, resp, info)
+			} else {
+				err, usage = aliImageEditHandler(c, resp, info)
+			}
 		case constant.RelayModeRerank:
 			err, usage = RerankHandler(c, resp, info)
 		default:
--- a/relay/channel/ali/dto.go
+++ b/relay/channel/ali/dto.go
@@ -112,6 +112,19 @@ type AliImageInput struct {
 	Messages       []AliMessage `json:"messages,omitempty"`
 }

+type WanImageInput struct {
+	Prompt         string   `json:"prompt"`                    // 必需：文本提示词，描述生成图像中期望包含的元素和视觉特点
+	Images         []string `json:"images"`                    // 必需：图像URL数组，长度不超过2，支持HTTP/HTTPS URL或Base64编码
+	NegativePrompt string   `json:"negative_prompt,omitempty"` // 可选：反向提示词，描述不希望在画面中看到的内容
+}
+
+type WanImageParameters struct {
+	N         int     `json:"n,omitempty"`         // 生成图片数量，取值范围1-4，默认4
+	Watermark *bool   `json:"watermark,omitempty"` // 是否添加水印标识，默认false
+	Seed      int     `json:"seed,omitempty"`      // 随机数种子，取值范围[0, 2147483647]
+	Strength  float64 `json:"strength,omitempty"`  // 修改幅度 0.0-1.0，默认0.5（部分模型支持）
+}
+
 type AliRerankParameters struct {
 	TopN            *int  `json:"top_n,omitempty"`
 	ReturnDocuments *bool `json:"return_documents,omitempty"`
--- a/relay/channel/ali/image.go
+++ b/relay/channel/ali/image.go
@@ -58,11 +58,7 @@ func oaiImage2Ali(request dto.ImageRequest) (*AliImageRequest, error) {
 	return &imageRequest, nil
 }

-func oaiFormEdit2AliImageEdit(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (*AliImageRequest, error) {
-	var imageRequest AliImageRequest
-	imageRequest.Model = request.Model
-	imageRequest.ResponseFormat = request.ResponseFormat
-
+func getImageBase64sFromForm(c *gin.Context, fieldName string) ([]string, error) {
 	mf := c.Request.MultipartForm
 	if mf == nil {
 		if _, err := c.MultipartForm(); err != nil {
@@ -98,9 +94,9 @@ func oaiFormEdit2AliImageEdit(c *gin.Context, info *relaycommon.RelayInfo, reque
 		return nil, errors.New("image is required")
 	}

-	if len(imageFiles) > 1 {
-		return nil, errors.New("only one image is supported for qwen edit")
-	}
+	//if len(imageFiles) > 1 {
+	//	return nil, errors.New("only one image is supported for qwen edit")
+	//}

 	// 获取base64编码的图片
 	var imageBase64s []string
@@ -127,7 +123,18 @@ func oaiFormEdit2AliImageEdit(c *gin.Context, info *relaycommon.RelayInfo, reque
 		imageBase64s = append(imageBase64s, dataURL)
 		image.Close()
 	}
+	return imageBase64s, nil
+}

+func oaiFormEdit2AliImageEdit(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (*AliImageRequest, error) {
+	var imageRequest AliImageRequest
+	imageRequest.Model = request.Model
+	imageRequest.ResponseFormat = request.ResponseFormat
+
+	imageBase64s, err := getImageBase64sFromForm(c, "image")
+	if err != nil {
+		return nil, fmt.Errorf("get image base64s from form failed: %w", err)
+	}
 	//dto.MediaContent{}
 	mediaContents := make([]AliMediaContent, len(imageBase64s))
 	for i, b64 := range imageBase64s {
--- a/relay/channel/ali/image_wan.go
+++ b/relay/channel/ali/image_wan.go
@@ -0,0 +1,39 @@
+package ali
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+
+	"github.com/gin-gonic/gin"
+)
+
+func oaiFormEdit2WanxImageEdit(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (*AliImageRequest, error) {
+	var err error
+	var imageRequest AliImageRequest
+	imageRequest.Model = request.Model
+	imageRequest.ResponseFormat = request.ResponseFormat
+	wanInput := WanImageInput{
+		Prompt: request.Prompt,
+	}
+
+	if err := common.UnmarshalBodyReusable(c, &wanInput); err != nil {
+		return nil, err
+	}
+	if wanInput.Images, err = getImageBase64sFromForm(c, "image"); err != nil {
+		return nil, fmt.Errorf("get image base64s from form failed: %w", err)
+	}
+	wanParams := WanImageParameters{
+		N: int(request.N),
+	}
+	imageRequest.Input = wanInput
+	imageRequest.Parameters = wanParams
+	return &imageRequest, nil
+}
+
+func isWanModel(modelName string) bool {
+	return strings.Contains(modelName, "wan")
+}
--- a/relay/channel/aws/constants.go
+++ b/relay/channel/aws/constants.go
@@ -18,6 +18,7 @@ var awsModelIDMap = map[string]string{
 	"claude-opus-4-1-20250805":   "anthropic.claude-opus-4-1-20250805-v1:0",
 	"claude-sonnet-4-5-20250929": "anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"claude-haiku-4-5-20251001":  "anthropic.claude-haiku-4-5-20251001-v1:0",
+	"claude-opus-4-5-20251101":   "anthropic.claude-opus-4-5-20251101-v1:0",
 	// Nova models
 	"nova-micro-v1:0":   "amazon.nova-micro-v1:0",
 	"nova-lite-v1:0":    "amazon.nova-lite-v1:0",
@@ -76,6 +77,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{
 		"ap": true,
 		"eu": true,
 	},
+	"anthropic.claude-opus-4-5-20251101-v1:0": {
+		"us": true,
+		"ap": true,
+		"eu": true,
+	},
 	"anthropic.claude-haiku-4-5-20251001-v1:0": {
 		"us": true,
 		"ap": true,
--- a/relay/channel/aws/dto.go
+++ b/relay/channel/aws/dto.go
@@ -1,15 +1,21 @@
 package aws

 import (
+	"context"
+	"encoding/json"
 	"io"
+	"net/http"
+	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
 )

 type AwsClaudeRequest struct {
 	// AnthropicVersion should be "bedrock-2023-05-31"
 	AnthropicVersion string              `json:"anthropic_version"`
+	AnthropicBeta    json.RawMessage     `json:"anthropic_beta,omitempty"`
 	System           any                 `json:"system,omitempty"`
 	Messages         []dto.ClaudeMessage `json:"messages"`
 	MaxTokens        uint                `json:"max_tokens,omitempty"`
@@ -22,29 +28,28 @@ type AwsClaudeRequest struct {
 	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
 }

-func copyRequest(req *dto.ClaudeRequest) *AwsClaudeRequest {
-	return &AwsClaudeRequest{
-		AnthropicVersion: "bedrock-2023-05-31",
-		System:           req.System,
-		Messages:         req.Messages,
-		MaxTokens:        req.MaxTokens,
-		Temperature:      req.Temperature,
-		TopP:             req.TopP,
-		TopK:             req.TopK,
-		StopSequences:    req.StopSequences,
-		Tools:            req.Tools,
-		ToolChoice:       req.ToolChoice,
-		Thinking:         req.Thinking,
-	}
-}
-
-func formatRequest(requestBody io.Reader) (*AwsClaudeRequest, error) {
+func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {
 	var awsClaudeRequest AwsClaudeRequest
 	err := common.DecodeJson(requestBody, &awsClaudeRequest)
 	if err != nil {
 		return nil, err
 	}
 	awsClaudeRequest.AnthropicVersion = "bedrock-2023-05-31"
+
+	// check header anthropic-beta
+	anthropicBetaValues := requestHeader.Get("anthropic-beta")
+	if len(anthropicBetaValues) > 0 {
+		var tempArray []string
+		tempArray = strings.Split(anthropicBetaValues, ",")
+		if len(tempArray) > 0 {
+			betaJson, err := json.Marshal(tempArray)
+			if err != nil {
+				return nil, err
+			}
+			awsClaudeRequest.AnthropicBeta = betaJson
+		}
+	}
+	logger.LogJson(context.Background(), "json", awsClaudeRequest)
 	return &awsClaudeRequest, nil
 }

--- a/relay/channel/aws/relay-aws.go
+++ b/relay/channel/aws/relay-aws.go
@@ -18,6 +18,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"

+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/credentials"
 	"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
@@ -25,6 +26,17 @@ import (
 	"github.com/aws/smithy-go/auth/bearer"
 )

+// getAwsErrorStatusCode extracts HTTP status code from AWS SDK error
+func getAwsErrorStatusCode(err error) int {
+	// Check for HTTP response error which contains status code
+	var httpErr interface{ HTTPStatusCode() int }
+	if errors.As(err, &httpErr) {
+		return httpErr.HTTPStatusCode()
+	}
+	// Default to 500 if we can't determine the status code
+	return http.StatusInternalServerError
+}
+
 func newAwsClient(c *gin.Context, info *relaycommon.RelayInfo) (*bedrockruntime.Client, error) {
 	var (
 		httpClient *http.Client
@@ -73,7 +85,6 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 	}
 	a.AwsClient = awsCli

-	println(info.UpstreamModelName)
 	// 获取对应的AWS模型ID
 	awsModelId := getAwsModelID(info.UpstreamModelName)

@@ -83,6 +94,10 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 		awsModelId = awsModelCrossRegion(awsModelId, awsRegionPrefix)
 	}

+	// init empty request.header
+	requestHeader := http.Header{}
+	a.SetupRequestHeader(c, &requestHeader, info)
+
 	if isNovaModel(awsModelId) {
 		var novaReq *NovaRequest
 		err = common.DecodeJson(requestBody, &novaReq)
@@ -104,7 +119,7 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 		awsReq.Body = reqBody
 		return nil, nil
 	} else {
-		awsClaudeReq, err := formatRequest(requestBody)
+		awsClaudeReq, err := formatRequest(requestBody, requestHeader)
 		if err != nil {
 			return nil, types.NewError(errors.Wrap(err, "format aws request fail"), types.ErrorCodeBadRequestBody)
 		}
@@ -115,7 +130,7 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 				Accept:      aws.String("application/json"),
 				ContentType: aws.String("application/json"),
 			}
-			awsReq.Body, err = common.Marshal(awsClaudeReq)
+			awsReq.Body, err = buildAwsRequestBody(c, info, awsClaudeReq)
 			if err != nil {
 				return nil, types.NewError(errors.Wrap(err, "marshal aws request fail"), types.ErrorCodeBadRequestBody)
 			}
@@ -127,7 +142,7 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 				Accept:      aws.String("application/json"),
 				ContentType: aws.String("application/json"),
 			}
-			awsReq.Body, err = common.Marshal(awsClaudeReq)
+			awsReq.Body, err = buildAwsRequestBody(c, info, awsClaudeReq)
 			if err != nil {
 				return nil, types.NewError(errors.Wrap(err, "marshal aws request fail"), types.ErrorCodeBadRequestBody)
 			}
@@ -137,6 +152,24 @@ func doAwsClientRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor,
 	}
 }

+// buildAwsRequestBody prepares the payload for AWS requests, applying passthrough rules when enabled.
+func buildAwsRequestBody(c *gin.Context, info *relaycommon.RelayInfo, awsClaudeReq any) ([]byte, error) {
+	if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
+		body, err := common.GetRequestBody(c)
+		if err != nil {
+			return nil, errors.Wrap(err, "get request body for pass-through fail")
+		}
+		var data map[string]interface{}
+		if err := common.Unmarshal(body, &data); err != nil {
+			return nil, errors.Wrap(err, "pass-through unmarshal request body fail")
+		}
+		delete(data, "model")
+		delete(data, "stream")
+		return common.Marshal(data)
+	}
+	return common.Marshal(awsClaudeReq)
+}
+
 func getAwsRegionPrefix(awsRegionId string) string {
 	parts := strings.Split(awsRegionId, "-")
 	regionPrefix := ""
@@ -170,7 +203,8 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types

 	awsResp, err := a.AwsClient.InvokeModel(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelInput))
 	if err != nil {
-		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, http.StatusInternalServerError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}

 	claudeInfo := &claude.ClaudeResponseInfo{
@@ -196,7 +230,8 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types
 func awsStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor) (*types.NewAPIError, *dto.Usage) {
 	awsResp, err := a.AwsClient.InvokeModelWithResponseStream(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelWithResponseStreamInput))
 	if err != nil {
-		return types.NewOpenAIError(errors.Wrap(err, "InvokeModelWithResponseStream"), types.ErrorCodeAwsInvokeError, http.StatusInternalServerError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModelWithResponseStream"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}
 	stream := awsResp.GetStream()
 	defer stream.Close()
@@ -235,7 +270,8 @@ func handleNovaRequest(c *gin.Context, info *relaycommon.RelayInfo, a *Adaptor)

 	awsResp, err := a.AwsClient.InvokeModel(c.Request.Context(), a.AwsReq.(*bedrockruntime.InvokeModelInput))
 	if err != nil {
-		return types.NewError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeChannelAwsClientError), nil
+		statusCode := getAwsErrorStatusCode(err)
+		return types.NewOpenAIError(errors.Wrap(err, "InvokeModel"), types.ErrorCodeAwsInvokeError, statusCode), nil
 	}

 	// 解析Nova响应
--- a/relay/channel/baidu/relay-baidu.go
+++ b/relay/channel/baidu/relay-baidu.go
@@ -150,7 +150,7 @@ func baiduHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon
 		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
 	}
 	if baiduResponse.ErrorMsg != "" {
-		return types.NewError(fmt.Errorf(baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
+		return types.NewError(fmt.Errorf("%s", baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
 	}
 	fullTextResponse := responseBaidu2OpenAI(&baiduResponse)
 	jsonResponse, err := json.Marshal(fullTextResponse)
@@ -175,7 +175,7 @@ func baiduEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *ht
 		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
 	}
 	if baiduResponse.ErrorMsg != "" {
-		return types.NewError(fmt.Errorf(baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
+		return types.NewError(fmt.Errorf("%s", baiduResponse.ErrorMsg), types.ErrorCodeBadResponseBody), nil
 	}
 	fullTextResponse := embeddingResponseBaidu2OpenAI(&baiduResponse)
 	jsonResponse, err := json.Marshal(fullTextResponse)
--- a/relay/channel/claude/constants.go
+++ b/relay/channel/claude/constants.go
@@ -9,6 +9,7 @@ var ModelList = []string{
 	"claude-3-opus-20240229",
 	"claude-3-haiku-20240307",
 	"claude-3-5-haiku-20241022",
+	"claude-haiku-4-5-20251001",
 	"claude-3-5-sonnet-20240620",
 	"claude-3-5-sonnet-20241022",
 	"claude-3-7-sonnet-20250219",
@@ -21,6 +22,8 @@ var ModelList = []string{
 	"claude-opus-4-1-20250805-thinking",
 	"claude-sonnet-4-5-20250929",
 	"claude-sonnet-4-5-20250929-thinking",
+	"claude-opus-4-5-20251101",
+	"claude-opus-4-5-20251101-thinking",
 }

 var ChannelName = "claude"
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -189,7 +189,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
 		claudeRequest.TopP = 0
 		claudeRequest.Temperature = common.GetPointer[float64](1.0)
-		claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		if !model_setting.ShouldPreserveThinkingSuffix(textRequest.Model) {
+			claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+		}
 	}

 	if textRequest.ReasoningEffort != "" {
@@ -481,9 +483,11 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
 				}
 			}
 		} else if claudeResponse.Type == "message_delta" {
-			finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
-			if finishReason != "null" {
-				choice.FinishReason = &finishReason
+			if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil {
+				finishReason := stopReasonClaude2OpenAI(*claudeResponse.Delta.StopReason)
+				if finishReason != "null" {
+					choice.FinishReason = &finishReason
+				}
 			}
 			//claudeUsage = &claudeResponse.Usage
 		} else if claudeResponse.Type == "message_stop" {
@@ -596,6 +600,8 @@ func FormatClaudeResponseInfo(requestMode int, claudeResponse *dto.ClaudeRespons
 			claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
 			claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
 			claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
+			claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
+			claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Message.Usage.GetCacheCreation1hTokens()
 			claudeInfo.Usage.CompletionTokens = claudeResponse.Message.Usage.OutputTokens
 		} else if claudeResponse.Type == "content_block_delta" {
 			if claudeResponse.Delta.Text != nil {
@@ -669,7 +675,7 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, claudeInfo *ClaudeResponseInfo, requestMode int) {

 	if requestMode == RequestModeCompletion {
-		claudeInfo.Usage = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, info.PromptTokens)
+		claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
 	} else {
 		if claudeInfo.Usage.PromptTokens == 0 {
 			//上游出错
@@ -678,7 +684,7 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau
 			if common.DebugEnabled {
 				common.SysLog("claude response usage is not complete, maybe upstream error")
 			}
-			claudeInfo.Usage = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
+			claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
 		}
 	}

@@ -730,16 +736,15 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 		return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
 	}
 	if requestMode == RequestModeCompletion {
-		completionTokens := service.CountTextToken(claudeResponse.Completion, info.OriginModelName)
-		claudeInfo.Usage.PromptTokens = info.PromptTokens
-		claudeInfo.Usage.CompletionTokens = completionTokens
-		claudeInfo.Usage.TotalTokens = info.PromptTokens + completionTokens
+		claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	} else {
 		claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
 		claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
 		claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
 		claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
 		claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
+		claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
+		claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Usage.GetCacheCreation1hTokens()
 	}
 	var responseData []byte
 	switch info.RelayFormat {
--- a/relay/channel/cloudflare/relay_cloudflare.go
+++ b/relay/channel/cloudflare/relay_cloudflare.go
@@ -74,7 +74,7 @@ func cfStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Res
 	if err := scanner.Err(); err != nil {
 		logger.LogError(c, "error_scanning_stream_response: "+err.Error())
 	}
-	usage := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+	usage := service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	if info.ShouldIncludeUsage {
 		response := helper.GenerateFinalUsageResponse(id, info.StartTime.Unix(), info.UpstreamModelName, *usage)
 		err := helper.ObjectData(c, response)
@@ -105,7 +105,7 @@ func cfHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response)
 	for _, choice := range response.Choices {
 		responseText += choice.Message.StringContent()
 	}
-	usage := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+	usage := service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	response.Usage = *usage
 	response.Id = helper.GetResponseID(c)
 	jsonResponse, err := json.Marshal(response)
@@ -142,10 +142,6 @@ func cfSTTHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respon
 	c.Writer.WriteHeader(resp.StatusCode)
 	_, _ = c.Writer.Write(jsonResponse)

-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.CompletionTokens = service.CountTextToken(cfResp.Result.Text, info.UpstreamModelName)
-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
-
+	usage := service.ResponseText2Usage(c, cfResp.Result.Text, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	return nil, usage
 }
--- a/relay/channel/cohere/relay-cohere.go
+++ b/relay/channel/cohere/relay-cohere.go
@@ -165,7 +165,7 @@ func cohereStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 		}
 	})
 	if usage.PromptTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	}
 	return usage, nil
 }
@@ -225,9 +225,9 @@ func cohereRerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.
 	}
 	usage := dto.Usage{}
 	if cohereResp.Meta.BilledUnits.InputTokens == 0 {
-		usage.PromptTokens = info.PromptTokens
+		usage.PromptTokens = info.GetEstimatePromptTokens()
 		usage.CompletionTokens = 0
-		usage.TotalTokens = info.PromptTokens
+		usage.TotalTokens = info.GetEstimatePromptTokens()
 	} else {
 		usage.PromptTokens = cohereResp.Meta.BilledUnits.InputTokens
 		usage.CompletionTokens = cohereResp.Meta.BilledUnits.OutputTokens
--- a/relay/channel/coze/relay-coze.go
+++ b/relay/channel/coze/relay-coze.go
@@ -142,7 +142,7 @@ func cozeChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *ht
 	helper.Done(c)

 	if usage.TotalTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, c.GetInt("coze_input_count"))
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, c.GetInt("coze_input_count"))
 	}

 	return usage, nil
@@ -208,7 +208,7 @@ func handleCozeEvent(c *gin.Context, event string, data string, responseText *st
 			return
 		}

-		common.SysLog(fmt.Sprintf("stream event error: ", errorData.Code, errorData.Message))
+		common.SysLog(fmt.Sprintf("stream event error: %v %v", errorData.Code, errorData.Message))
 	}
 }

--- a/relay/channel/dify/relay-dify.go
+++ b/relay/channel/dify/relay-dify.go
@@ -246,7 +246,7 @@ func difyStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 	})
 	helper.Done(c)
 	if usage.TotalTokens == 0 {
-		usage = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseText, info.UpstreamModelName, info.GetEstimatePromptTokens())
 	}
 	usage.CompletionTokens += nodeToken
 	return usage, nil
--- a/relay/channel/gemini/adaptor.go
+++ b/relay/channel/gemini/adaptor.go
@@ -13,6 +13,7 @@ import (
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/relay/constant"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -127,7 +128,8 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {

-	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled &&
+		!model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) {
 		// 新增逻辑：处理 -thinking-<budget> 格式
 		if strings.Contains(info.UpstreamModelName, "-thinking-") {
 			parts := strings.Split(info.UpstreamModelName, "-thinking-")
@@ -136,6 +138,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 		} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
+		} else if baseModel, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
+			info.UpstreamModelName = baseModel
 		}
 	}

@@ -176,7 +180,7 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 		return nil, errors.New("request is nil")
 	}

-	geminiRequest, err := CovertGemini2OpenAI(c, *request, info)
+	geminiRequest, err := CovertOpenAI2Gemini(c, *request, info)
 	if err != nil {
 		return nil, err
 	}
--- a/relay/channel/gemini/constant.go
+++ b/relay/channel/gemini/constant.go
@@ -8,6 +8,7 @@ var ModelList = []string{
 	"gemini-1.5-pro-latest", "gemini-1.5-flash-latest",
 	// preview version
 	"gemini-2.0-flash-lite-preview",
+	"gemini-3-pro-preview",
 	// gemini exp
 	"gemini-exp-1206",
 	// flash exp
@@ -31,7 +32,7 @@ var SafetySettingList = []string{
 	"HARM_CATEGORY_HATE_SPEECH",
 	"HARM_CATEGORY_SEXUALLY_EXPLICIT",
 	"HARM_CATEGORY_DANGEROUS_CONTENT",
-	"HARM_CATEGORY_CIVIC_INTEGRITY",
+	//"HARM_CATEGORY_CIVIC_INTEGRITY", This item is deprecated!
 }

 var ChannelName = "google gemini"
--- a/relay/channel/gemini/relay-gemini-native.go
+++ b/relay/channel/gemini/relay-gemini-native.go
@@ -3,7 +3,6 @@ package gemini
 import (
 	"io"
 	"net/http"
-	"strings"

 	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
@@ -13,8 +12,6 @@ import (
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/types"

-	"github.com/pkg/errors"
-
 	"github.com/gin-gonic/gin"
 )

@@ -72,10 +69,7 @@ func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *rel
 		println(string(responseBody))
 	}

-	usage := &dto.Usage{
-		PromptTokens: info.PromptTokens,
-		TotalTokens:  info.PromptTokens,
-	}
+	usage := service.ResponseText2Usage(c, "", info.UpstreamModelName, info.GetEstimatePromptTokens())

 	if info.IsGeminiBatchEmbedding {
 		var geminiResponse dto.GeminiBatchEmbeddingResponse
@@ -97,80 +91,15 @@ func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *rel
 }

 func GeminiTextGenerationStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	var usage = &dto.Usage{}
-	var imageCount int
-
 	helper.SetEventStreamHeaders(c)

-	responseText := strings.Builder{}
-
-	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
-		var geminiResponse dto.GeminiChatResponse
-		err := common.UnmarshalJsonStr(data, &geminiResponse)
+	return geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
+		err := helper.StringData(c, data)
 		if err != nil {
-			logger.LogError(c, "error unmarshalling stream response: "+err.Error())
+			logger.LogError(c, "failed to write stream data: "+err.Error())
 			return false
 		}
-
-		// 统计图片数量
-		for _, candidate := range geminiResponse.Candidates {
-			for _, part := range candidate.Content.Parts {
-				if part.InlineData != nil && part.InlineData.MimeType != "" {
-					imageCount++
-				}
-				if part.Text != "" {
-					responseText.WriteString(part.Text)
-				}
-			}
-		}
-
-		// 更新使用量统计
-		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
-			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
-			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
-			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
-			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
-			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
-				if detail.Modality == "AUDIO" {
-					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
-				} else if detail.Modality == "TEXT" {
-					usage.PromptTokensDetails.TextTokens = detail.TokenCount
-				}
-			}
-		}
-
-		// 直接发送 GeminiChatResponse 响应
-		err = helper.StringData(c, data)
-		if err != nil {
-			logger.LogError(c, err.Error())
-		}
 		info.SendResponseCount++
 		return true
 	})
-
-	if info.SendResponseCount == 0 {
-		return nil, types.NewOpenAIError(errors.New("no response received from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
-	}
-
-	if imageCount != 0 {
-		if usage.CompletionTokens == 0 {
-			usage.CompletionTokens = imageCount * 258
-		}
-	}
-
-	// 如果usage.CompletionTokens为0，则使用本地统计的completion tokens
-	if usage.CompletionTokens == 0 {
-		str := responseText.String()
-		if len(str) > 0 {
-			usage = service.ResponseText2Usage(responseText.String(), info.UpstreamModelName, info.PromptTokens)
-		} else {
-			// 空补全，不需要使用量
-			usage = &dto.Usage{}
-		}
-	}
-
-	// 移除流式响应结尾的[Done]，因为Gemini API没有发送Done的行为
-	//helper.Done(c)
-
-	return usage, nil
 }
--- a/relay/channel/gemini/relay-gemini.go
+++ b/relay/channel/gemini/relay-gemini.go
@@ -19,8 +19,8 @@ import (
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
+	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
-
 	"github.com/gin-gonic/gin"
 )

@@ -44,6 +44,8 @@ var geminiSupportedMimeTypes = map[string]bool{
 	"video/flv":       true,
 }

+const thoughtSignatureBypassValue = "context_engineering_is_the_way_to_go"
+
 // Gemini 允许的思考预算范围
 const (
 	pro25MinBudget       = 128
@@ -96,6 +98,7 @@ func clampThinkingBudget(modelName string, budget int) int {
 // "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
 // "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
 // "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
+// "effort": "minimal" - Allocates a minimal portion of tokens (approximately 5% of max_tokens)
 func clampThinkingBudgetByEffort(modelName string, effort string) int {
 	isNew25Pro := isNew25ProModel(modelName)
 	is25FlashLite := is25FlashLiteModel(modelName)
@@ -116,6 +119,8 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
 		maxBudget = maxBudget * 50 / 100
 	case "low":
 		maxBudget = maxBudget * 20 / 100
+	case "minimal":
+		maxBudget = maxBudget * 5 / 100
 	}
 	return clampThinkingBudget(modelName, maxBudget)
 }
@@ -176,12 +181,18 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
 					ThinkingBudget: common.GetPointer(0),
 				}
 			}
+		} else if _, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
+			geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
+				IncludeThoughts: true,
+				ThinkingLevel:   level,
+			}
+			info.ReasoningEffort = level
 		}
 	}
 }

 // Setting safety to the lowest possible values since Gemini is already powerless enough
-func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) {
+func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) {

 	geminiRequest := dto.GeminiChatRequest{
 		Contents: make([]dto.GeminiChatContent, 0, len(textRequest.Messages)),
@@ -193,6 +204,10 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 		},
 	}

+	attachThoughtSignature := (info.ChannelType == constant.ChannelTypeGemini ||
+		info.ChannelType == constant.ChannelTypeVertexAi) &&
+		model_setting.GetGeminiSettings().FunctionCallThoughtSignatureEnabled
+
 	if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
 		geminiRequest.GenerationConfig.ResponseModalities = []string{
 			"TEXT",
@@ -202,6 +217,7 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i

 	adaptorWithExtraBody := false

+	// patch extra_body
 	if len(textRequest.ExtraBody) > 0 {
 		if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
 			var extraBody map[string]interface{}
@@ -233,6 +249,39 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 						}
 					}
 				}
+
+				// check error param name like imageConfig, should be image_config
+				if _, hasErrorParam := googleBody["imageConfig"]; hasErrorParam {
+					return nil, errors.New("extra_body.google.imageConfig is not supported, use extra_body.google.image_config instead")
+				}
+
+				if imageConfig, ok := googleBody["image_config"].(map[string]interface{}); ok {
+					// check error param name like aspectRatio, should be aspect_ratio
+					if _, hasErrorParam := imageConfig["aspectRatio"]; hasErrorParam {
+						return nil, errors.New("extra_body.google.image_config.aspectRatio is not supported, use extra_body.google.image_config.aspect_ratio instead")
+					}
+					// check error param name like imageSize, should be image_size
+					if _, hasErrorParam := imageConfig["imageSize"]; hasErrorParam {
+						return nil, errors.New("extra_body.google.image_config.imageSize is not supported, use extra_body.google.image_config.image_size instead")
+					}
+
+					// convert snake_case to camelCase for Gemini API
+					geminiImageConfig := make(map[string]interface{})
+					if aspectRatio, ok := imageConfig["aspect_ratio"]; ok {
+						geminiImageConfig["aspectRatio"] = aspectRatio
+					}
+					if imageSize, ok := imageConfig["image_size"]; ok {
+						geminiImageConfig["imageSize"] = imageSize
+					}
+
+					if len(geminiImageConfig) > 0 {
+						imageConfigBytes, err := common.Marshal(geminiImageConfig)
+						if err != nil {
+							return nil, fmt.Errorf("failed to marshal image_config: %w", err)
+						}
+						geminiRequest.GenerationConfig.ImageConfig = imageConfigBytes
+					}
+				}
 			}
 		}
 	}
@@ -371,6 +420,8 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 		content := dto.GeminiChatContent{
 			Role: message.Role,
 		}
+		shouldAttachThoughtSignature := attachThoughtSignature && (message.Role == "assistant" || message.Role == "model")
+		signatureAttached := false
 		// isToolCall := false
 		if message.ToolCalls != nil {
 			// message.Role = "model"
@@ -388,6 +439,10 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 						Arguments:    args,
 					},
 				}
+				if shouldAttachThoughtSignature && !signatureAttached && hasFunctionCallContent(toolCall.FunctionCall) && len(toolCall.ThoughtSignature) == 0 {
+					toolCall.ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
+					signatureAttached = true
+				}
 				parts = append(parts, toolCall)
 				tool_call_ids[call.ID] = call.Function.Name
 			}
@@ -400,9 +455,68 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 				if part.Text == "" {
 					continue
 				}
-				parts = append(parts, dto.GeminiPart{
-					Text: part.Text,
-				})
+				// check markdown image ![image](data:image/jpeg;base64,xxxxxxxxxxxx)
+				// 使用字符串查找而非正则，避免大文本性能问题
+				text := part.Text
+				hasMarkdownImage := false
+				for {
+					// 快速检查是否包含 markdown 图片标记
+					startIdx := strings.Index(text, "![")
+					if startIdx == -1 {
+						break
+					}
+					// 找到 ](
+					bracketIdx := strings.Index(text[startIdx:], "](data:")
+					if bracketIdx == -1 {
+						break
+					}
+					bracketIdx += startIdx
+					// 找到闭合的 )
+					closeIdx := strings.Index(text[bracketIdx+2:], ")")
+					if closeIdx == -1 {
+						break
+					}
+					closeIdx += bracketIdx + 2
+
+					hasMarkdownImage = true
+					// 添加图片前的文本
+					if startIdx > 0 {
+						textBefore := text[:startIdx]
+						if textBefore != "" {
+							parts = append(parts, dto.GeminiPart{
+								Text: textBefore,
+							})
+						}
+					}
+					// 提取 data URL (从 "](" 后面开始，到 ")" 之前)
+					dataUrl := text[bracketIdx+2 : closeIdx]
+					imageNum += 1
+					if constant.GeminiVisionMaxImageNum != -1 && imageNum > constant.GeminiVisionMaxImageNum {
+						return nil, fmt.Errorf("too many images in the message, max allowed is %d", constant.GeminiVisionMaxImageNum)
+					}
+					format, base64String, err := service.DecodeBase64FileData(dataUrl)
+					if err != nil {
+						return nil, fmt.Errorf("decode markdown base64 image data failed: %s", err.Error())
+					}
+					imgPart := dto.GeminiPart{
+						InlineData: &dto.GeminiInlineData{
+							MimeType: format,
+							Data:     base64String,
+						},
+					}
+					if shouldAttachThoughtSignature {
+						imgPart.ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
+					}
+					parts = append(parts, imgPart)
+					// 继续处理剩余文本
+					text = text[closeIdx+1:]
+				}
+				// 添加剩余文本或原始文本（如果没有找到 markdown 图片）
+				if !hasMarkdownImage {
+					parts = append(parts, dto.GeminiPart{
+						Text: part.Text,
+					})
+				}
 			} else if part.Type == dto.ContentTypeImageURL {
 				imageNum += 1

@@ -472,6 +586,17 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 			}
 		}

+		// 如果需要附加签名但还没有附加（没有 tool_calls 或 tool_calls 为空），
+		// 则在第一个文本 part 上附加 thoughtSignature
+		if shouldAttachThoughtSignature && !signatureAttached && len(parts) > 0 {
+			for i := range parts {
+				if parts[i].Text != "" {
+					parts[i].ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
+					break
+				}
+			}
+		}
+
 		content.Parts = parts

 		// there's no assistant role in gemini and API shall vomit if Role is not user or model
@@ -496,6 +621,28 @@ func CovertGemini2OpenAI(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
 	return &geminiRequest, nil
 }

+func hasFunctionCallContent(call *dto.FunctionCall) bool {
+	if call == nil {
+		return false
+	}
+	if strings.TrimSpace(call.FunctionName) != "" {
+		return true
+	}
+
+	switch v := call.Arguments.(type) {
+	case nil:
+		return false
+	case string:
+		return strings.TrimSpace(v) != ""
+	case map[string]interface{}:
+		return len(v) > 0
+	case []interface{}:
+		return len(v) > 0
+	default:
+		return true
+	}
+}
+
 // Helper function to get a list of supported MIME types for error messages
 func getSupportedMimeTypesList() []string {
 	keys := make([]string, 0, len(geminiSupportedMimeTypes))
@@ -920,14 +1067,10 @@ func handleFinalStream(c *gin.Context, info *relaycommon.RelayInfo, resp *dto.Ch
 	return nil
 }

-func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
-	// responseText := ""
-	id := helper.GetResponseID(c)
-	createAt := common.GetTimestamp()
-	responseText := strings.Builder{}
+func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response, callback func(data string, geminiResponse *dto.GeminiChatResponse) bool) (*dto.Usage, *types.NewAPIError) {
 	var usage = &dto.Usage{}
 	var imageCount int
-	finishReason := constant.FinishReasonStop
+	responseText := strings.Builder{}

 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var geminiResponse dto.GeminiChatResponse
@@ -937,6 +1080,7 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 			return false
 		}

+		// 统计图片数量
 		for _, candidate := range geminiResponse.Candidates {
 			for _, part := range candidate.Content.Parts {
 				if part.InlineData != nil && part.InlineData.MimeType != "" {
@@ -948,14 +1092,10 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 			}
 		}

-		response, isStop := streamResponseGeminiChat2OpenAI(&geminiResponse)
-
-		response.Id = id
-		response.Created = createAt
-		response.Model = info.UpstreamModelName
+		// 更新使用量统计
 		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
 			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
-			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
+			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
 			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
@@ -966,6 +1106,45 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 				}
 			}
 		}
+
+		return callback(data, &geminiResponse)
+	})
+
+	if imageCount != 0 {
+		if usage.CompletionTokens == 0 {
+			usage.CompletionTokens = imageCount * 1400
+		}
+	}
+
+	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
+	if usage.TotalTokens > 0 {
+		usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+	}
+
+	if usage.CompletionTokens <= 0 {
+		str := responseText.String()
+		if len(str) > 0 {
+			usage = service.ResponseText2Usage(c, responseText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
+		} else {
+			usage = &dto.Usage{}
+		}
+	}
+
+	return usage, nil
+}
+
+func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
+	id := helper.GetResponseID(c)
+	createAt := common.GetTimestamp()
+	finishReason := constant.FinishReasonStop
+
+	usage, err := geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
+		response, isStop := streamResponseGeminiChat2OpenAI(geminiResponse)
+
+		response.Id = id
+		response.Created = createAt
+		response.Model = info.UpstreamModelName
+
 		logger.LogDebug(c, fmt.Sprintf("info.SendResponseCount = %d", info.SendResponseCount))
 		if info.SendResponseCount == 0 {
 			// send first response
@@ -981,7 +1160,7 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 					emptyResponse.Choices[0].Delta.ToolCalls = copiedToolCalls
 				}
 				finishReason = constant.FinishReasonToolCalls
-				err = handleStream(c, info, emptyResponse)
+				err := handleStream(c, info, emptyResponse)
 				if err != nil {
 					logger.LogError(c, err.Error())
 				}
@@ -991,14 +1170,14 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 					response.Choices[0].FinishReason = nil
 				}
 			} else {
-				err = handleStream(c, info, emptyResponse)
+				err := handleStream(c, info, emptyResponse)
 				if err != nil {
 					logger.LogError(c, err.Error())
 				}
 			}
 		}

-		err = handleStream(c, info, response)
+		err := handleStream(c, info, response)
 		if err != nil {
 			logger.LogError(c, err.Error())
 		}
@@ -1008,40 +1187,15 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
 		return true
 	})

-	if info.SendResponseCount == 0 {
-		// 空补全，报错不计费
-		// empty response, throw an error
-		return nil, types.NewOpenAIError(errors.New("no response received from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
-	}
-
-	if imageCount != 0 {
-		if usage.CompletionTokens == 0 {
-			usage.CompletionTokens = imageCount * 258
-		}
-	}
-
-	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
-	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
-
-	if usage.CompletionTokens == 0 {
-		str := responseText.String()
-		if len(str) > 0 {
-			usage = service.ResponseText2Usage(responseText.String(), info.UpstreamModelName, info.PromptTokens)
-		} else {
-			// 空补全，不需要使用量
-			usage = &dto.Usage{}
-		}
+	if err != nil {
+		return usage, err
 	}

 	response := helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
-	err := handleFinalStream(c, info, response)
-	if err != nil {
-		common.SysLog("send final response failed: " + err.Error())
+	handleErr := handleFinalStream(c, info, response)
+	if handleErr != nil {
+		common.SysLog("send final response failed: " + handleErr.Error())
 	}
-	//if info.RelayFormat == relaycommon.RelayFormatOpenAI {
-	//	helper.Done(c)
-	//}
-	//resp.Body.Close()
 	return usage, nil
 }

@@ -1143,11 +1297,7 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *h
 	// Google has not yet clarified how embedding models will be billed
 	// refer to openai billing method to use input tokens billing
 	// https://platform.openai.com/docs/guides/embeddings#what-are-embeddings
-	usage := &dto.Usage{
-		PromptTokens:     info.PromptTokens,
-		CompletionTokens: 0,
-		TotalTokens:      info.PromptTokens,
-	}
+	usage := service.ResponseText2Usage(c, "", info.UpstreamModelName, info.GetEstimatePromptTokens())
 	openAIResponse.Usage = *usage

 	jsonResponse, jsonErr := common.Marshal(openAIResponse)
--- a/relay/channel/minimax/tts.go
+++ b/relay/channel/minimax/tts.go
@@ -163,7 +163,7 @@ func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	}

 	usage = &dto.Usage{
-		PromptTokens:     info.PromptTokens,
+		PromptTokens:     info.GetEstimatePromptTokens(),
 		CompletionTokens: 0,
 		TotalTokens:      int(minimaxResp.ExtraInfo.UsageCharacters),
 	}
--- a/relay/channel/moonshot/adaptor.go
+++ b/relay/channel/moonshot/adaptor.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"net/http"

+	channelconstant "github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/relay/channel"
 	"github.com/QuantumNous/new-api/relay/channel/claude"
@@ -44,6 +45,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	baseURL := info.ChannelBaseUrl
+	if specialPlan, ok := channelconstant.ChannelSpecialBases[baseURL]; ok {
+		if info.RelayFormat == types.RelayFormatClaude {
+			return fmt.Sprintf("%s/v1/messages", specialPlan.ClaudeBaseURL), nil
+		}
+		if info.RelayFormat == types.RelayFormatOpenAI {
+			return fmt.Sprintf("%s/chat/completions", specialPlan.OpenAIBaseURL), nil
+		}
+	}
+
 	switch info.RelayFormat {
 	case types.RelayFormatClaude:
 		return fmt.Sprintf("%s/anthropic/v1/messages", info.ChannelBaseUrl), nil
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -27,6 +27,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/common_handler"
 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
 	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
@@ -41,7 +42,7 @@ type Adaptor struct {
 // support OAI models: o1-mini/o3-mini/o4-mini/o1/o3 etc...
 // minimal effort only available in gpt-5
 func parseReasoningEffortFromModelSuffix(model string) (string, string) {
-	effortSuffixes := []string{"-high", "-minimal", "-low", "-medium"}
+	effortSuffixes := []string{"-high", "-minimal", "-low", "-medium", "-none", "-xhigh"}
 	for _, suffix := range effortSuffixes {
 		if strings.HasSuffix(model, suffix) {
 			effort := strings.TrimPrefix(suffix, "-")
@@ -224,7 +225,8 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 			request.Usage = json.RawMessage(`{"include":true}`)
 		}
 		// 适配 OpenRouter 的 thinking 后缀
-		if strings.HasSuffix(info.UpstreamModelName, "-thinking") {
+		if !model_setting.ShouldPreserveThinkingSuffix(info.OriginModelName) &&
+			strings.HasSuffix(info.UpstreamModelName, "-thinking") {
 			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
 			request.Model = info.UpstreamModelName
 			if len(request.Reasoning) == 0 {
@@ -304,10 +306,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 			request.Temperature = nil
 		}

+		// gpt-5系列模型适配 归零不再支持的参数
 		if strings.HasPrefix(info.UpstreamModelName, "gpt-5") {
-			if info.UpstreamModelName != "gpt-5-chat-latest" {
-				request.Temperature = nil
-			}
+			request.Temperature = nil
+			request.TopP = 0 // oai 的 top_p 默认值是 1.0，但是为了 omitempty 属性直接不传，这里显式设置为 0
+			request.LogProbs = false
 		}

 		// 转换模型推理力度后缀
--- a/relay/channel/openai/audio.go
+++ b/relay/channel/openai/audio.go
@@ -0,0 +1,145 @@
+package openai
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+	"net/http"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/constant"
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/logger"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/helper"
+	"github.com/QuantumNous/new-api/service"
+	"github.com/QuantumNous/new-api/types"
+	"github.com/gin-gonic/gin"
+)
+
+func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) *dto.Usage {
+	// the status code has been judged before, if there is a body reading failure,
+	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
+	// Analogous to nginx's load balancing, it will only retry if it can't be requested or
+	// if the upstream returns a specific status code, once the upstream has already written the header,
+	// the subsequent failure of the response body should be regarded as a non-recoverable error,
+	// and can be terminated directly.
+	defer service.CloseResponseBodyGracefully(resp)
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.GetEstimatePromptTokens()
+	usage.TotalTokens = info.GetEstimatePromptTokens()
+	for k, v := range resp.Header {
+		c.Writer.Header().Set(k, v[0])
+	}
+	c.Writer.WriteHeader(resp.StatusCode)
+
+	if info.IsStream {
+		helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+			if service.SundaySearch(data, "usage") {
+				var simpleResponse dto.SimpleResponse
+				err := common.Unmarshal([]byte(data), &simpleResponse)
+				if err != nil {
+					logger.LogError(c, err.Error())
+				}
+				if simpleResponse.Usage.TotalTokens != 0 {
+					usage.PromptTokens = simpleResponse.Usage.InputTokens
+					usage.CompletionTokens = simpleResponse.OutputTokens
+					usage.TotalTokens = simpleResponse.TotalTokens
+				}
+			}
+			_ = helper.StringData(c, data)
+			return true
+		})
+	} else {
+		common.SetContextKey(c, constant.ContextKeyLocalCountTokens, true)
+		// 读取响应体到缓冲区
+		bodyBytes, err := io.ReadAll(resp.Body)
+		if err != nil {
+			logger.LogError(c, fmt.Sprintf("failed to read TTS response body: %v", err))
+			c.Writer.WriteHeaderNow()
+			return usage
+		}
+
+		// 写入响应到客户端
+		c.Writer.WriteHeaderNow()
+		_, err = c.Writer.Write(bodyBytes)
+		if err != nil {
+			logger.LogError(c, fmt.Sprintf("failed to write TTS response: %v", err))
+		}
+
+		// 计算音频时长并更新 usage
+		audioFormat := "mp3" // 默认格式
+		if audioReq, ok := info.Request.(*dto.AudioRequest); ok && audioReq.ResponseFormat != "" {
+			audioFormat = audioReq.ResponseFormat
+		}
+
+		var duration float64
+		var durationErr error
+
+		if audioFormat == "pcm" {
+			// PCM 格式没有文件头，根据 OpenAI TTS 的 PCM 参数计算时长
+			// 采样率: 24000 Hz, 位深度: 16-bit (2 bytes), 声道数: 1
+			const sampleRate = 24000
+			const bytesPerSample = 2
+			const channels = 1
+			duration = float64(len(bodyBytes)) / float64(sampleRate*bytesPerSample*channels)
+		} else {
+			ext := "." + audioFormat
+			reader := bytes.NewReader(bodyBytes)
+			duration, durationErr = common.GetAudioDuration(c.Request.Context(), reader, ext)
+		}
+
+		usage.PromptTokensDetails.TextTokens = usage.PromptTokens
+
+		if durationErr != nil {
+			logger.LogWarn(c, fmt.Sprintf("failed to get audio duration: %v", durationErr))
+			// 如果无法获取时长，则设置保底的 CompletionTokens，根据body大小计算
+			sizeInKB := float64(len(bodyBytes)) / 1000.0
+			estimatedTokens := int(math.Ceil(sizeInKB)) // 粗略估算每KB约等于1 token
+			usage.CompletionTokens = estimatedTokens
+			usage.CompletionTokenDetails.AudioTokens = estimatedTokens
+		} else if duration > 0 {
+			// 计算 token: ceil(duration) / 60.0 * 1000，即每分钟 1000 tokens
+			completionTokens := int(math.Round(math.Ceil(duration) / 60.0 * 1000))
+			usage.CompletionTokens = completionTokens
+			usage.CompletionTokenDetails.AudioTokens = completionTokens
+		}
+		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	}
+
+	return usage
+}
+
+func OpenaiSTTHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, responseFormat string) (*types.NewAPIError, *dto.Usage) {
+	defer service.CloseResponseBodyGracefully(resp)
+
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
+	}
+	// 写入新的 response body
+	service.IOCopyBytesGracefully(c, resp, responseBody)
+
+	var responseData struct {
+		Usage *dto.Usage `json:"usage"`
+	}
+	if err := common.Unmarshal(responseBody, &responseData); err == nil && responseData.Usage != nil {
+		if responseData.Usage.TotalTokens > 0 {
+			usage := responseData.Usage
+			if usage.PromptTokens == 0 {
+				usage.PromptTokens = usage.InputTokens
+			}
+			if usage.CompletionTokens == 0 {
+				usage.CompletionTokens = usage.OutputTokens
+			}
+			return nil, usage
+		}
+	}
+
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.GetEstimatePromptTokens()
+	usage.CompletionTokens = 0
+	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	return nil, usage
+}
--- a/relay/channel/openai/helper.go
+++ b/relay/channel/openai/helper.go
@@ -172,7 +172,7 @@ func handleLastResponse(lastStreamData string, responseId *string, createAt *int
 	shouldSendLastResp *bool) error {

 	var lastStreamResponse dto.ChatCompletionsStreamResponse
-	if err := json.Unmarshal(common.StringToByteSlice(lastStreamData), &lastStreamResponse); err != nil {
+	if err := common.Unmarshal(common.StringToByteSlice(lastStreamData), &lastStreamResponse); err != nil {
 		return err
 	}

--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -1,7 +1,6 @@
 package openai

 import (
-	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -122,6 +121,10 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 	var usage = &dto.Usage{}
 	var streamItems []string // store stream items
 	var lastStreamData string
+	var secondLastStreamData string // 存储倒数第二个stream data，用于音频模型
+
+	// 检查是否为音频模型
+	isAudioModel := strings.Contains(strings.ToLower(model), "audio")

 	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		if lastStreamData != "" {
@@ -131,12 +134,35 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 			}
 		}
 		if len(data) > 0 {
+			// 对音频模型，保存倒数第二个stream data
+			if isAudioModel && lastStreamData != "" {
+				secondLastStreamData = lastStreamData
+			}
+
 			lastStreamData = data
 			streamItems = append(streamItems, data)
 		}
 		return true
 	})

+	// 对音频模型，从倒数第二个stream data中提取usage信息
+	if isAudioModel && secondLastStreamData != "" {
+		var streamResp struct {
+			Usage *dto.Usage `json:"usage"`
+		}
+		err := common.Unmarshal([]byte(secondLastStreamData), &streamResp)
+		if err == nil && streamResp.Usage != nil && service.ValidUsage(streamResp.Usage) {
+			usage = streamResp.Usage
+			containStreamUsage = true
+
+			if common.DebugEnabled {
+				logger.LogDebug(c, fmt.Sprintf("Audio model usage extracted from second last SSE: PromptTokens=%d, CompletionTokens=%d, TotalTokens=%d, InputTokens=%d, OutputTokens=%d",
+					usage.PromptTokens, usage.CompletionTokens, usage.TotalTokens,
+					usage.InputTokens, usage.OutputTokens))
+			}
+		}
+	}
+
 	// 处理最后的响应
 	shouldSendLastResp := true
 	if err := handleLastResponse(lastStreamData, &responseId, &createAt, &systemFingerprint, &model, &usage,
@@ -156,7 +182,7 @@ func OaiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Re
 	}

 	if !containStreamUsage {
-		usage = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
+		usage = service.ResponseText2Usage(c, responseTextBuilder.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
 		usage.CompletionTokens += toolCount * 7
 	}

@@ -218,9 +244,9 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo
 			}
 		}
 		simpleResponse.Usage = dto.Usage{
-			PromptTokens:     info.PromptTokens,
+			PromptTokens:     info.GetEstimatePromptTokens(),
 			CompletionTokens: completionTokens,
-			TotalTokens:      info.PromptTokens + completionTokens,
+			TotalTokens:      info.GetEstimatePromptTokens() + completionTokens,
 		}
 		usageModified = true
 	}
@@ -300,68 +326,6 @@ func streamTTSResponse(c *gin.Context, resp *http.Response) {
 	}
 }

-func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) *dto.Usage {
-	// the status code has been judged before, if there is a body reading failure,
-	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
-	// Analogous to nginx's load balancing, it will only retry if it can't be requested or
-	// if the upstream returns a specific status code, once the upstream has already written the header,
-	// the subsequent failure of the response body should be regarded as a non-recoverable error,
-	// and can be terminated directly.
-	defer service.CloseResponseBodyGracefully(resp)
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.TotalTokens = info.PromptTokens
-	for k, v := range resp.Header {
-		c.Writer.Header().Set(k, v[0])
-	}
-	c.Writer.WriteHeader(resp.StatusCode)
-
-	isStreaming := resp.ContentLength == -1 || resp.Header.Get("Content-Length") == ""
-	if isStreaming {
-		streamTTSResponse(c, resp)
-	} else {
-		c.Writer.WriteHeaderNow()
-		_, err := io.Copy(c.Writer, resp.Body)
-		if err != nil {
-			logger.LogError(c, err.Error())
-		}
-	}
-	return usage
-}
-
-func OpenaiSTTHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, responseFormat string) (*types.NewAPIError, *dto.Usage) {
-	defer service.CloseResponseBodyGracefully(resp)
-
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
-	}
-	// 写入新的 response body
-	service.IOCopyBytesGracefully(c, resp, responseBody)
-
-	var responseData struct {
-		Usage *dto.Usage `json:"usage"`
-	}
-	if err := json.Unmarshal(responseBody, &responseData); err == nil && responseData.Usage != nil {
-		if responseData.Usage.TotalTokens > 0 {
-			usage := responseData.Usage
-			if usage.PromptTokens == 0 {
-				usage.PromptTokens = usage.InputTokens
-			}
-			if usage.CompletionTokens == 0 {
-				usage.CompletionTokens = usage.OutputTokens
-			}
-			return nil, usage
-		}
-	}
-
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.CompletionTokens = 0
-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
-	return nil, usage
-}
-
 func OpenaiRealtimeHandler(c *gin.Context, info *relaycommon.RelayInfo) (*types.NewAPIError, *dto.RealtimeUsage) {
 	if info == nil || info.ClientWs == nil || info.TargetWs == nil {
 		return types.NewError(fmt.Errorf("invalid websocket connection"), types.ErrorCodeBadResponse), nil
@@ -632,7 +596,7 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res
 		if usage.PromptTokensDetails.CachedTokens == 0 && usage.PromptCacheHitTokens != 0 {
 			usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
 		}
-	case constant.ChannelTypeZhipu_v4:
+	case constant.ChannelTypeZhipu_v4, constant.ChannelTypeMoonshot:
 		if usage.PromptTokensDetails.CachedTokens == 0 {
 			if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
 				usage.PromptTokensDetails.CachedTokens = usage.InputTokensDetails.CachedTokens
@@ -660,7 +624,7 @@ func extractCachedTokensFromBody(body []byte) (int, bool) {
 		} `json:"usage"`
 	}

-	if err := json.Unmarshal(body, &payload); err != nil {
+	if err := common.Unmarshal(body, &payload); err != nil {
 		return 0, false
 	}

--- a/relay/channel/openai/relay_responses.go
+++ b/relay/channel/openai/relay_responses.go
@@ -141,7 +141,7 @@ func OaiResponsesStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
 	}

 	if usage.PromptTokens == 0 && usage.CompletionTokens != 0 {
-		usage.PromptTokens = info.PromptTokens
+		usage.PromptTokens = info.GetEstimatePromptTokens()
 	}

 	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
--- a/Show More
+++ b/Show More