feat: add AzureNoRemoveDotTime constant and update channel handling #1044

- Introduced a new constant `AzureNoRemoveDotTime` in `constant/azure.go` to manage model name formatting for channels created after May 10, 2025. - Updated `distributor.go` to set `channel_create_time` in the context. - Modified `adaptor.go` to conditionally remove dots from model names based on the channel creation time. - Enhanced `relay_info.go` to include `ChannelCreateTime` in the `RelayInfo` struct. - Updated English localization files to reflect changes in model name handling for new channels.
fix: update OpenAI request handling to include 'o1-preview' model support #1029
2026-03-30 19:23:00 +00:00 · 2025-05-08 22:39:55 +08:00 · 2025-05-08 21:34:31 +08:00 · 2025-05-08 01:21:34 +08:00 · 2025-05-07 22:06:51 +08:00 · 2025-05-07 20:56:36 +08:00
257 changed files with 22386 additions and 10144 deletions
--- a/.env.example
+++ b/.env.example
@@ -50,10 +50,6 @@
 # CHANNEL_TEST_FREQUENCY=10
 # 生成默认token
 # GENERATE_DEFAULT_TOKEN=false
-# Gemini 安全设置
-# GEMINI_SAFETY_SETTING=BLOCK_NONE
-# Gemini版本设置
-# GEMINI_MODEL_MAP=gemini-1.0-pro:v1
 # Cohere 安全设置
 # COHERE_SAFETY_SETTING=NONE
 # 是否统计图片token
--- a/.github/workflows/docker-image-amd64.yml
+++ b/.github/workflows/docker-image-amd64.yml
@@ -18,20 +18,20 @@ jobs:
      contents: read
    steps:
      - name: Check out the repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Save version info
        run: |
          git describe --tags > VERSION 

      - name: Log in to Docker Hub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Log in to the Container registry
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
@@ -39,14 +39,14 @@ jobs:

      - name: Extract metadata (tags, labels) for Docker
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
        with:
          images: |
            calciumion/new-api
            ghcr.io/${{ github.repository }}

      - name: Build and push Docker images
-        uses: docker/build-push-action@v3
+        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
--- a/.github/workflows/docker-image-arm64.yml
+++ b/.github/workflows/docker-image-arm64.yml
@@ -4,7 +4,6 @@ on:
  push:
    tags:
      - '*'
-      - '!*-alpha*'
  workflow_dispatch:
    inputs:
      name:
@@ -19,26 +18,26 @@ jobs:
      contents: read
    steps:
      - name: Check out the repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Save version info
        run: |
          git describe --tags > VERSION 

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3

      - name: Log in to Docker Hub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Log in to the Container registry
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
@@ -46,14 +45,14 @@ jobs:

      - name: Extract metadata (tags, labels) for Docker
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
        with:
          images: |
            calciumion/new-api
            ghcr.io/${{ github.repository }}

      - name: Build and push Docker images
-        uses: docker/build-push-action@v3
+        uses: docker/build-push-action@v5
        with:
          context: .
          platforms: linux/amd64,linux/arm64
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ logs
 web/dist
 .env
 one-api
-.DS_Store
+.DS_Store
+tiktoken_cache
--- a/README.en.md
+++ b/README.en.md
@@ -1,10 +1,13 @@
+<p align="right">
+   <a href="./README.md">中文</a> | <strong>English</strong>
+</p>
 <div align="center">

 ![new-api](/web/public/logo.png)

 # New API

-🍥 Next Generation LLM Gateway and AI Asset Management System
+🍥 Next-Generation Large Model Gateway and AI Asset Management System

 <a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

@@ -33,161 +36,155 @@
 > This is an open-source project developed based on [One API](https://github.com/songquanpeng/one-api)

 > [!IMPORTANT]  
-> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and relevant laws and regulations. Not to be used for illegal purposes.
-> - This project is for personal learning only. Stability is not guaranteed, and no technical support is provided.
+> - This project is for personal learning purposes only, with no guarantee of stability or technical support.
+> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes.
+> - According to the [《Interim Measures for the Management of Generative Artificial Intelligence Services》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), please do not provide any unregistered generative AI services to the public in China.
+
+## 📚 Documentation
+
+For detailed documentation, please visit our official Wiki: [https://docs.newapi.pro/](https://docs.newapi.pro/)

 ## ✨ Key Features

-1. 🎨 New UI interface (some interfaces pending update)
-2. 🌍 Multi-language support (work in progress)
-3. 🎨 Added [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface support, [Integration Guide](Midjourney.md)
-4. 💰 Online recharge support, configurable in system settings:
-    - [x] EasyPay
-5. 🔍 Query usage quota by key:
-    - Works with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)
-6. 📑 Configurable items per page in pagination
-7. 🔄 Compatible with original One API database (one-api.db)
-8. 💵 Support per-request model pricing, configurable in System Settings - Operation Settings
-9. ⚖️ Support channel **weighted random** selection
-10. 📈 Data dashboard (console)
-11. 🔒 Configurable model access per token
-12. 🤖 Telegram authorization login support:
-    1. System Settings - Configure Login Registration - Allow Telegram Login
-    2. Send /setdomain command to [@Botfather](https://t.me/botfather)
-    3. Select your bot, then enter http(s)://your-website/login
-    4. Telegram Bot name is the bot username without @
-13. 🎵 Added [Suno API](https://github.com/Suno-API/Suno-API) interface support, [Integration Guide](Suno.md)
-14. 🔄 Support for Rerank models, compatible with Cohere and Jina, can integrate with Dify, [Integration Guide](Rerank.md)
-15. ⚡ **[OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime/integration)** - Support for OpenAI's Realtime API, including Azure channels
-16. 🧠 Support for setting reasoning effort through model name suffix:
-    - Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`)
-    - Add suffix `-medium` to set medium reasoning effort
-    - Add suffix `-low` to set low reasoning effort
+New API offers a wide range of features, please refer to [Features Introduction](https://docs.newapi.pro/wiki/features-introduction) for details:
+
+1. 🎨 Brand new UI interface
+2. 🌍 Multi-language support
+3. 💰 Online recharge functionality (YiPay)
+4. 🔍 Support for querying usage quotas with keys (works with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
+5. 🔄 Compatible with the original One API database
+6. 💵 Support for pay-per-use model pricing
+7. ⚖️ Support for weighted random channel selection
+8. 📈 Data dashboard (console)
+9. 🔒 Token grouping and model restrictions
+10. 🤖 Support for more authorization login methods (LinuxDO, Telegram, OIDC)
+11. 🔄 Support for Rerank models (Cohere and Jina), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
+12. ⚡ Support for OpenAI Realtime API (including Azure channels), [API Documentation](https://docs.newapi.pro/api/openai-realtime)
+13. ⚡ Support for Claude Messages format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
+14. Support for entering chat interface via /chat2link route
+15. 🧠 Support for setting reasoning effort through model name suffixes:
+    1. OpenAI o-series models
+        - Add `-high` suffix for high reasoning effort (e.g.: `o3-mini-high`)
+        - Add `-medium` suffix for medium reasoning effort (e.g.: `o3-mini-medium`)
+        - Add `-low` suffix for low reasoning effort (e.g.: `o3-mini-low`)
+    2. Claude thinking models
+        - Add `-thinking` suffix to enable thinking mode (e.g.: `claude-3-7-sonnet-20250219-thinking`)
+16. 🔄 Thinking-to-content functionality
+17. 🔄 Model rate limiting for users
+18. 💰 Cache billing support, which allows billing at a set ratio when cache is hit:
+    1. Set the `Prompt Cache Ratio` option in `System Settings-Operation Settings`
+    2. Set `Prompt Cache Ratio` in the channel, range 0-1, e.g., setting to 0.5 means billing at 50% when cache is hit
+    3. Supported channels:
+        - [x] OpenAI
+        - [x] Azure
+        - [x] DeepSeek
+        - [x] Claude

 ## Model Support
-This version additionally supports:
-1. Third-party model **gps** (gpt-4-gizmo-*)
-2. [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface, [Integration Guide](Midjourney.md)
-3. Custom channels with full API URL support
-4. [Suno API](https://github.com/Suno-API/Suno-API) interface, [Integration Guide](Suno.md)
-5. Rerank models, supporting [Cohere](https://cohere.ai/) and [Jina](https://jina.ai/), [Integration Guide](Rerank.md)
-6. Dify

-You can add custom models gpt-4-gizmo-* in channels. These are third-party models and cannot be called with official OpenAI keys.
+This version supports multiple models, please refer to [API Documentation-Relay Interface](https://docs.newapi.pro/api) for details:

-## Additional Configurations Beyond One API
- `GENERATE_DEFAULT_TOKEN`: Generate initial token for new users, default `false`
- `STREAMING_TIMEOUT`: Set streaming response timeout, default 60 seconds
- `DIFY_DEBUG`: Output workflow and node info to client for Dify channel, default `true`
- `FORCE_STREAM_OPTION`: Override client stream_options parameter, default `true`
- `GET_MEDIA_TOKEN`: Calculate image tokens, default `true`
- `GET_MEDIA_TOKEN_NOT_STREAM`: Calculate image tokens in non-stream mode, default `true`
- `UPDATE_TASK`: Update async tasks (Midjourney, Suno), default `true`
- `GEMINI_MODEL_MAP`: Specify Gemini model versions (v1/v1beta), format: "model:version", comma-separated
- `COHERE_SAFETY_SETTING`: Cohere model [safety settings](https://docs.cohere.com/docs/safety-modes#overview), options: `NONE`, `CONTEXTUAL`, `STRICT`, default `NONE`
- `GEMINI_VISION_MAX_IMAGE_NUM`: Gemini model maximum image number, default `16`, set to `-1` to disable
- `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default `20`
- `CRYPTO_SECRET`: Encryption key for encrypting database content
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, if not specified in channel settings, use this version, default `2024-12-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Duration of notification limit in minutes, default `10`
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications in the specified duration, default `2`
+1. Third-party models **gpts** (gpt-4-gizmo-*)
+2. Third-party channel [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) interface, [API Documentation](https://docs.newapi.pro/api/midjourney-proxy-image)
+3. Third-party channel [Suno API](https://github.com/Suno-API/Suno-API) interface, [API Documentation](https://docs.newapi.pro/api/suno-music)
+4. Custom channels, supporting full call address input
+5. Rerank models ([Cohere](https://cohere.ai/) and [Jina](https://jina.ai/)), [API Documentation](https://docs.newapi.pro/api/jinaai-rerank)
+6. Claude Messages format, [API Documentation](https://docs.newapi.pro/api/anthropic-chat)
+7. Dify, currently only supports chatflow
+
+## Environment Variable Configuration
+
+For detailed configuration instructions, please refer to [Installation Guide-Environment Variables Configuration](https://docs.newapi.pro/installation/environment-variables):
+
+- `GENERATE_DEFAULT_TOKEN`: Whether to generate initial tokens for newly registered users, default is `false`
+- `STREAMING_TIMEOUT`: Streaming response timeout, default is 60 seconds
+- `DIFY_DEBUG`: Whether to output workflow and node information for Dify channels, default is `true`
+- `FORCE_STREAM_OPTION`: Whether to override client stream_options parameter, default is `true`
+- `GET_MEDIA_TOKEN`: Whether to count image tokens, default is `true`
+- `GET_MEDIA_TOKEN_NOT_STREAM`: Whether to count image tokens in non-streaming cases, default is `true`
+- `UPDATE_TASK`: Whether to update asynchronous tasks (Midjourney, Suno), default is `true`
+- `COHERE_SAFETY_SETTING`: Cohere model safety settings, options are `NONE`, `CONTEXTUAL`, `STRICT`, default is `NONE`
+- `GEMINI_VISION_MAX_IMAGE_NUM`: Maximum number of images for Gemini models, default is `16`
+- `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default is `20`
+- `CRYPTO_SECRET`: Encryption key used for encrypting database content
+- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2024-12-01-preview`
+- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
+- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`

 ## Deployment

+For detailed deployment guides, please refer to [Installation Guide-Deployment Methods](https://docs.newapi.pro/installation):
+
 > [!TIP]
-> Latest Docker image: `calciumion/new-api:latest`  
-> Default account: root, password: 123456
+> Latest Docker image: `calciumion/new-api:latest`

-### Multi-Server Deployment
- Must set `SESSION_SECRET` environment variable, otherwise login state will not be consistent across multiple servers.
- If using a public Redis, must set `CRYPTO_SECRET` environment variable, otherwise Redis content will not be able to be obtained in multi-server deployment.
+### Multi-machine Deployment Considerations
+- Environment variable `SESSION_SECRET` must be set, otherwise login status will be inconsistent across multiple machines
+- If sharing Redis, `CRYPTO_SECRET` must be set, otherwise Redis content cannot be accessed across multiple machines

-### Requirements
- Local database (default): SQLite (Docker deployment must mount `/data` directory)
- Remote database: MySQL >= 5.7.8, PgSQL >= 9.6
+### Deployment Requirements
+- Local database (default): SQLite (Docker deployment must mount the `/data` directory)
+- Remote database: MySQL version >= 5.7.8, PgSQL version >= 9.6

-### Deployment with BT Panel
-Install BT Panel (**version 9.2.0** or above) from [BT Panel Official Website](https://www.bt.cn/new/download.html), choose the stable version script to download and install.  
-After installation, log in to BT Panel and click Docker in the menu bar. First-time access will prompt to install Docker service. Click Install Now and follow the prompts to complete installation.  
-After installation, find **New-API** in the app store, click install, configure basic options to complete installation.  
-[Pictorial Guide](BT.md)
+### Deployment Methods

-### Docker Deployment
+#### Using BaoTa Panel Docker Feature
+Install BaoTa Panel (version **9.2.0** or above), find **New-API** in the application store and install it.
+[Tutorial with images](./docs/BT.md)

-### Using Docker Compose (Recommended)
+#### Using Docker Compose (Recommended)
 ```shell
-# Clone project
+# Download the project
 git clone https://github.com/Calcium-Ion/new-api.git
 cd new-api
 # Edit docker-compose.yml as needed
-# nano docker-compose.yml
-# vim docker-compose.yml
 # Start
 docker-compose up -d
 ```

-#### Update Version
+#### Using Docker Image Directly
 ```shell
-docker-compose pull
-docker-compose up -d
-```
-
-### Direct Docker Image Usage
-```shell
-# SQLite deployment:
+# Using SQLite
 docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest

-# MySQL deployment (add -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi"), modify database connection parameters as needed
-# Example:
+# Using MySQL
 docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
 ```

-#### Update Version
-```shell
-# Pull the latest image
-docker pull calciumion/new-api:latest
-# Stop and remove the old container
-docker stop new-api
-docker rm new-api
-# Run the new container with the same parameters as before
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
-```
+## Channel Retry and Cache
+Channel retry functionality has been implemented, you can set the number of retries in `Settings->Operation Settings->General Settings`. It is **recommended to enable caching**.

-Alternatively, you can use Watchtower for automatic updates (not recommended, may cause database incompatibility):
-```shell
-docker run --rm -v /var/run/docker.sock:/var/run/docker.sock containrrr/watchtower -cR
-```
+### Cache Configuration Method
+1. `REDIS_CONN_STRING`: Set Redis as cache
+2. `MEMORY_CACHE_ENABLED`: Enable memory cache (no need to set manually if Redis is set)

-## Channel Retry
-Channel retry is implemented, configurable in `Settings->Operation Settings->General Settings`. **Cache recommended**.  
-First retry uses same priority, second retry uses next priority, and so on.
+## API Documentation

-### Cache Configuration
-1. `REDIS_CONN_STRING`: Use Redis as cache
-    + Example: `REDIS_CONN_STRING=redis://default:redispw@localhost:49153`
-2. `MEMORY_CACHE_ENABLED`: Enable memory cache, default `false`
-    + Example: `MEMORY_CACHE_ENABLED=true`
+For detailed API documentation, please refer to [API Documentation](https://docs.newapi.pro/api):

-### Why Some Errors Don't Retry
-Error codes 400, 504, 524 won't retry
-### To Enable Retry for 400
-In `Channel->Edit`, set `Status Code Override` to:
-```json
-{
-  "400": "500"
-}
-```
-
-## Integration Guides
- [Midjourney Integration](Midjourney.md)
- [Suno Integration](Suno.md)
+- [Chat API](https://docs.newapi.pro/api/openai-chat)
+- [Image API](https://docs.newapi.pro/api/openai-image)
+- [Rerank API](https://docs.newapi.pro/api/jinaai-rerank)
+- [Realtime API](https://docs.newapi.pro/api/openai-realtime)
+- [Claude Chat API (messages)](https://docs.newapi.pro/api/anthropic-chat)

 ## Related Projects
 - [One API](https://github.com/songquanpeng/one-api): Original project
 - [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy): Midjourney interface support
- [chatnio](https://github.com/Deeptrain-Community/chatnio): Next-gen AI B/C solution
- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool): Query usage quota by key
+- [chatnio](https://github.com/Deeptrain-Community/chatnio): Next-generation AI one-stop B/C-end solution
+- [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool): Query usage quota with key
+
+Other projects based on New API:
+- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon): High-performance optimized version of New API
+- [VoAPI](https://github.com/VoAPI/VoAPI): Frontend beautified version based on New API
+
+## Help and Support
+
+If you have any questions, please refer to [Help and Support](https://docs.newapi.pro/support):
+- [Community Interaction](https://docs.newapi.pro/support/community-interaction)
+- [Issue Feedback](https://docs.newapi.pro/support/feedback-issues)
+- [FAQ](https://docs.newapi.pro/support/faq)

 ## 🌟 Star History

-[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
+[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
--- a/README.md
+++ b/README.md
@@ -7,7 +7,6 @@

 # New API

-
 🍥新一代大模型网关与AI资产管理系统

 <a href="https://trendshift.io/repositories/8227" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
@@ -37,183 +36,154 @@
 > 本项目为开源项目，在[One API](https://github.com/songquanpeng/one-api)的基础上进行二次开发

 > [!IMPORTANT]  
-> - 使用者必须在遵循 OpenAI 的[使用条款](https://openai.com/policies/terms-of-use)以及**法律法规**的情况下使用，不得用于非法用途。
 > - 本项目仅供个人学习使用，不保证稳定性，且不提供任何技术支持。
+> - 使用者必须在遵循 OpenAI 的[使用条款](https://openai.com/policies/terms-of-use)以及**法律法规**的情况下使用，不得用于非法用途。
 > - 根据[《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm)的要求，请勿对中国地区公众提供一切未经备案的生成式人工智能服务。

+## 📚 文档
+
+详细文档请访问我们的官方Wiki：[https://docs.newapi.pro/](https://docs.newapi.pro/)
+
 ## ✨ 主要特性

-1. 🎨 全新的UI界面（部分界面还待更新）
-2. 🌍 多语言支持（待完善）
-3. 🎨 添加[Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy)接口支持，[对接文档](Midjourney.md)
-4. 💰 支持在线充值功能，可在系统设置中设置：
-    - [x] 易支付
-5. 🔍 支持用key查询使用额度：
-    - 配合项目[neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)可实现用key查询使用
-6. 📑 分页支持选择每页显示数量
-7. 🔄 兼容原版One API的数据库，可直接使用原版数据库（one-api.db）
-8. 💵 支持模型按次数收费，可在 系统设置-运营设置 中设置
-9. ⚖️ 支持渠道**加权随机**
-10. 📈 数据看板（控制台）
-11. 🔒 可设置令牌能调用的模型
-12. 🤖 支持Telegram授权登录：
-    1. 系统设置-配置登录注册-允许通过Telegram登录
-    2. 对[@Botfather](https://t.me/botfather)输入指令/setdomain
-    3. 选择你的bot，然后输入http(s)://你的网站地址/login
-    4. Telegram Bot 名称是bot username 去掉@后的字符串
-13. 🎵 添加 [Suno API](https://github.com/Suno-API/Suno-API)接口支持，[对接文档](Suno.md)
-14. 🔄 支持Rerank模型，目前兼容Cohere和Jina，可接入Dify，[对接文档](Rerank.md)
-15. ⚡ **[OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime/integration)** - 支持OpenAI的Realtime API，支持Azure渠道
-16. 支持使用路由/chat2link 进入聊天界面
-17. 🧠 支持通过模型名称后缀设置 reasoning effort：
-    - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
-    - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
-    - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
+New API提供了丰富的功能，详细特性请参考[特性说明](https://docs.newapi.pro/wiki/features-introduction)：
+
+1. 🎨 全新的UI界面
+2. 🌍 多语言支持
+3. 💰 支持在线充值功能（易支付）
+4. 🔍 支持用key查询使用额度（配合[neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)）
+5. 🔄 兼容原版One API的数据库
+6. 💵 支持模型按次数收费
+7. ⚖️ 支持渠道加权随机
+8. 📈 数据看板（控制台）
+9. 🔒 令牌分组、模型限制
+10. 🤖 支持更多授权登陆方式（LinuxDO,Telegram、OIDC）
+11. 🔄 支持Rerank模型（Cohere和Jina），[接口文档](https://docs.newapi.pro/api/jinaai-rerank)
+12. ⚡ 支持OpenAI Realtime API（包括Azure渠道），[接口文档](https://docs.newapi.pro/api/openai-realtime)
+13. ⚡ 支持Claude Messages 格式，[接口文档](https://docs.newapi.pro/api/anthropic-chat)
+14. 支持使用路由/chat2link进入聊天界面
+15. 🧠 支持通过模型名称后缀设置 reasoning effort：
+    1. OpenAI o系列模型
+        - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`)
+        - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`)
+        - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`)
+    2. Claude 思考模型
+        - 添加后缀 `-thinking` 启用思考模式 (例如: `claude-3-7-sonnet-20250219-thinking`)
+16. 🔄 思考转内容功能
+17. 🔄 针对用户的模型限流功能
+18. 💰 缓存计费支持，开启后可以在缓存命中时按照设定的比例计费：
+    1. 在 `系统设置-运营设置` 中设置 `提示缓存倍率` 选项
+    2. 在渠道中设置 `提示缓存倍率`，范围 0-1，例如设置为 0.5 表示缓存命中时按照 50% 计费
+    3. 支持的渠道：
+        - [x] OpenAI
+        - [x] Azure
+        - [x] DeepSeek
+        - [x] Claude

 ## 模型支持
-此版本额外支持以下模型：
-1. 第三方模型 **gps** （gpt-4-gizmo-*）
-2. [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy)接口，[对接文档](Midjourney.md)
-3. 自定义渠道，支持填入完整调用地址
-4. [Suno API](https://github.com/Suno-API/Suno-API) 接口，[对接文档](Suno.md)
-5. Rerank模型，目前支持[Cohere](https://cohere.ai/)和[Jina](https://jina.ai/)，[对接文档](Rerank.md)
-6. Dify

-您可以在渠道中添加自定义模型gpt-4-gizmo-*，此模型并非OpenAI官方模型，而是第三方模型，使用官方key无法调用。
+此版本支持多种模型，详情请参考[接口文档-中继接口](https://docs.newapi.pro/api)：

-## 比原版One API多出的配置
- `GENERATE_DEFAULT_TOKEN`：是否为新注册用户生成初始令牌，默认为 `false`。
- `STREAMING_TIMEOUT`：设置流式一次回复的超时时间，默认为 60 秒。
- `DIFY_DEBUG`：设置 Dify 渠道是否输出工作流和节点信息到客户端，默认为 `true`。
- `FORCE_STREAM_OPTION`：是否覆盖客户端stream_options参数，请求上游返回流模式usage，默认为 `true`，建议开启，不影响客户端传入stream_options参数返回结果。
- `GET_MEDIA_TOKEN`：是否统计图片token，默认为 `true`，关闭后将不再在本地计算图片token，可能会导致和上游计费不同，此项覆盖 `GET_MEDIA_TOKEN_NOT_STREAM` 选项作用。
- `GET_MEDIA_TOKEN_NOT_STREAM`：是否在非流（`stream=false`）情况下统计图片token，默认为 `true`。
- `UPDATE_TASK`：是否更新异步任务（Midjourney、Suno），默认为 `true`，关闭后将不会更新任务进度。
- `GEMINI_MODEL_MAP`：Gemini模型指定版本(v1/v1beta)，使用"模型:版本"指定，","分隔，例如：-e GEMINI_MODEL_MAP="gemini-1.5-pro-latest:v1beta,gemini-1.5-pro-001:v1beta"，为空则使用默认配置(v1beta)
- `COHERE_SAFETY_SETTING`：Cohere模型[安全设置](https://docs.cohere.com/docs/safety-modes#overview)，可选值为 `NONE`, `CONTEXTUAL`, `STRICT`，默认为 `NONE`。
- `GEMINI_VISION_MAX_IMAGE_NUM`：Gemini模型最大图片数量，默认为 `16`，设置为 `-1` 则不限制。
- `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小，单位 MB，默认为 `20`。
- `CRYPTO_SECRET`：加密密钥，用于加密数据库内容。
- `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，如果渠道设置中未指定API版本，则使用此版本，默认为 `2024-12-01-preview`
- `NOTIFICATION_LIMIT_DURATION_MINUTE`：通知限制的持续时间（分钟），默认为 `10`。
- `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认为 `2`。
+1. 第三方模型 **gpts** （gpt-4-gizmo-*）
+2. 第三方渠道[Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy)接口，[接口文档](https://docs.newapi.pro/api/midjourney-proxy-image)
+3. 第三方渠道[Suno API](https://github.com/Suno-API/Suno-API)接口，[接口文档](https://docs.newapi.pro/api/suno-music)
+4. 自定义渠道，支持填入完整调用地址
+5. Rerank模型（[Cohere](https://cohere.ai/)和[Jina](https://jina.ai/)），[接口文档](https://docs.newapi.pro/api/jinaai-rerank)
+6. Claude Messages 格式，[接口文档](https://docs.newapi.pro/api/anthropic-chat)
+7. Dify，当前仅支持chatflow
+
+## 环境变量配置
+
+详细配置说明请参考[安装指南-环境变量配置](https://docs.newapi.pro/installation/environment-variables)：
+
+- `GENERATE_DEFAULT_TOKEN`：是否为新注册用户生成初始令牌，默认为 `false`
+- `STREAMING_TIMEOUT`：流式回复超时时间，默认60秒
+- `DIFY_DEBUG`：Dify渠道是否输出工作流和节点信息，默认 `true`
+- `FORCE_STREAM_OPTION`：是否覆盖客户端stream_options参数，默认 `true`
+- `GET_MEDIA_TOKEN`：是否统计图片token，默认 `true`
+- `GET_MEDIA_TOKEN_NOT_STREAM`：非流情况下是否统计图片token，默认 `true`
+- `UPDATE_TASK`：是否更新异步任务（Midjourney、Suno），默认 `true`
+- `COHERE_SAFETY_SETTING`：Cohere模型安全设置，可选值为 `NONE`, `CONTEXTUAL`, `STRICT`，默认 `NONE`
+- `GEMINI_VISION_MAX_IMAGE_NUM`：Gemini模型最大图片数量，默认 `16`
+- `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小，单位MB，默认 `20`
+- `CRYPTO_SECRET`：加密密钥，用于加密数据库内容
+- `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，默认 `2024-12-01-preview`
+- `NOTIFICATION_LIMIT_DURATION_MINUTE`：通知限制持续时间，默认 `10`分钟
+- `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认 `2`

 ## 部署

+详细部署指南请参考[安装指南-部署方式](https://docs.newapi.pro/installation)：
+
 > [!TIP]
 > 最新版Docker镜像：`calciumion/new-api:latest`  
-> 默认账号root 密码123456

-### 多机部署
- 必须设置环境变量 `SESSION_SECRET`，否则会导致多机部署时登录状态不一致。
- 如果公用Redis，必须设置 `CRYPTO_SECRET`，否则会导致多机部署时Redis内容无法获取。
+### 多机部署注意事项
+- 必须设置环境变量 `SESSION_SECRET`，否则会导致多机部署时登录状态不一致
+- 如果公用Redis，必须设置 `CRYPTO_SECRET`，否则会导致多机部署时Redis内容无法获取

 ### 部署要求
- 本地数据库（默认）：SQLite（Docker 部署默认使用 SQLite，必须挂载 `/data` 目录到宿主机）
- 远程数据库：MySQL 版本 >= 5.7.8，PgSQL 版本 >= 9.6
+- 本地数据库（默认）：SQLite（Docker部署必须挂载`/data`目录）
+- 远程数据库：MySQL版本 >= 5.7.8，PgSQL版本 >= 9.6

-### 使用宝塔面板Docker功能部署
-安装宝塔面板 (**9.2.0版本**及以上)，前往 [宝塔面板](https://www.bt.cn/new/download.html) 官网，选择正式版的脚本下载安装  
-安装后登录宝塔面板，在菜单栏中点击 Docker ，首次进入会提示安装 Docker 服务，点击立即安装，按提示完成安装  
-安装完成后在应用商店中找到 **New-API** ，点击安装，配置基本选项 即可完成安装  
-[图文教程](BT.md)
+### 部署方式

-### 基于 Docker 进行部署
+#### 使用宝塔面板Docker功能部署
+安装宝塔面板（**9.2.0版本**及以上），在应用商店中找到**New-API**安装即可。
+[图文教程](./docs/BT.md)

-> [!TIP]
-> 默认管理员账号root 密码123456
-
-### 使用 Docker Compose 部署（推荐）
+#### 使用Docker Compose部署（推荐）
 ```shell
 # 下载项目
 git clone https://github.com/Calcium-Ion/new-api.git
 cd new-api
-# 按需编辑 docker-compose.yml
-# nano docker-compose.yml
-# vim docker-compose.yml
+# 按需编辑docker-compose.yml
 # 启动
 docker-compose up -d
 ```

-#### 更新版本
+#### 直接使用Docker镜像
 ```shell
-docker-compose pull
-docker-compose up -d
-```
-
-### 直接使用 Docker 镜像
-```shell
-# 使用 SQLite 的部署命令：
+# 使用SQLite
 docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest

-# 使用 MySQL 的部署命令，在上面的基础上添加 `-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi"`，请自行修改数据库连接参数。
-# 例如：
+# 使用MySQL
 docker run --name new-api -d --restart always -p 3000:3000 -e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
 ```

-#### 更新版本
-```shell
-# 拉取最新镜像
-docker pull calciumion/new-api:latest
-# 停止并删除旧容器
-docker stop new-api
-docker rm new-api
-# 使用相同参数运行新容器
-docker run --name new-api -d --restart always -p 3000:3000 -e TZ=Asia/Shanghai -v /home/ubuntu/data/new-api:/data calciumion/new-api:latest
-```
+## 渠道重试与缓存
+渠道重试功能已经实现，可以在`设置->运营设置->通用设置`设置重试次数，**建议开启缓存**功能。

-或者使用 Watchtower 自动更新（不推荐，可能会导致数据库不兼容）：
-```shell
-docker run --rm -v /var/run/docker.sock:/var/run/docker.sock containrrr/watchtower -cR
-```
-
-## 渠道重试
-渠道重试功能已经实现，可以在`设置->运营设置->通用设置`设置重试次数，**建议开启缓存**功能。  
-如果开启了重试功能，第一次重试使用同优先级，第二次重试使用下一个优先级，以此类推。
 ### 缓存设置方法
-1. `REDIS_CONN_STRING`：设置之后将使用 Redis 作为缓存使用。
-    + 例子：`REDIS_CONN_STRING=redis://default:redispw@localhost:49153`
-2. `MEMORY_CACHE_ENABLED`：启用内存缓存（如果设置了`REDIS_CONN_STRING`，则无需手动设置），会导致用户额度的更新存在一定的延迟，可选值为 `true` 和 `false`，未设置则默认为 `false`。
-    + 例子：`MEMORY_CACHE_ENABLED=true`
-### 为什么有的时候没有重试
-这些错误码不会重试：400，504，524
-### 我想让400也重试
-在`渠道->编辑`中，将`状态码复写`改为
-```json
-{
-  "400": "500"
-}
-```
-可以实现400错误转为500错误，从而重试
+1. `REDIS_CONN_STRING`：设置Redis作为缓存
+2. `MEMORY_CACHE_ENABLED`：启用内存缓存（设置了Redis则无需手动设置）

-## Midjourney接口设置文档
-[对接文档](Midjourney.md)
+## 接口文档

-## Suno接口设置文档
-[对接文档](Suno.md)
+详细接口文档请参考[接口文档](https://docs.newapi.pro/api)：

-## 界面截图
-![image](https://github.com/user-attachments/assets/a0dcd349-5df8-4dc8-9acf-ca272b239919)
-
-
-![image](https://github.com/user-attachments/assets/c7d0f7e1-729c-43e2-ac7c-2cb73b0afc8e)
-
-![image](https://github.com/user-attachments/assets/29f81de5-33fc-4fc5-a5ff-f9b54b653c7c)
-
-![image](https://github.com/user-attachments/assets/4fa53e18-d2c5-477a-9b26-b86e44c71e35)
-
-## 交流群
-<img src="https://github.com/user-attachments/assets/9ca0bc82-e057-4230-a28d-9f198fa022e3" width="200">
+- [聊天接口（Chat）](https://docs.newapi.pro/api/openai-chat)
+- [图像接口（Image）](https://docs.newapi.pro/api/openai-image)
+- [重排序接口（Rerank）](https://docs.newapi.pro/api/jinaai-rerank)
+- [实时对话接口（Realtime）](https://docs.newapi.pro/api/openai-realtime)
+- [Claude聊天接口（messages）](https://docs.newapi.pro/api/anthropic-chat)

 ## 相关项目
 - [One API](https://github.com/songquanpeng/one-api)：原版项目
 - [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy)：Midjourney接口支持
- [chatnio](https://github.com/Deeptrain-Community/chatnio)：下一代 AI 一站式 B/C 端解决方案
+- [chatnio](https://github.com/Deeptrain-Community/chatnio)：下一代AI一站式B/C端解决方案
 - [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)：用key查询使用额度

 其他基于New API的项目：
- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon)：New API高性能优化版，并支持Claude格式
- [VoAPI](https://github.com/VoAPI/VoAPI)：基于New API的闭源项目
+- [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon)：New API高性能优化版
+- [VoAPI](https://github.com/VoAPI/VoAPI)：基于New API的前端美化版本
+
+## 帮助支持
+
+如有问题，请参考[帮助支持](https://docs.newapi.pro/support)：
+- [社区交流](https://docs.newapi.pro/support/community-interaction)
+- [反馈问题](https://docs.newapi.pro/support/feedback-issues)
+- [常见问题](https://docs.newapi.pro/support/faq)

 ## 🌟 Star History

--- a/common/constants.go
+++ b/common/constants.go
@@ -1,8 +1,8 @@
 package common

 import (
-	"os"
-	"strconv"
+	//"os"
+	//"strconv"
 	"sync"
 	"time"

@@ -15,8 +15,9 @@ var SystemName = "New API"
 var Footer = ""
 var Logo = ""
 var TopUpLink = ""
-var ChatLink = ""
-var ChatLink2 = ""
+
+// var ChatLink = ""
+// var ChatLink2 = ""
 var QuotaPerUnit = 500 * 1000.0 // $0.002 / 1K tokens
 var DisplayInCurrencyEnabled = true
 var DisplayTokenStatEnabled = true
@@ -61,9 +62,13 @@ var EmailDomainWhitelist = []string{
 	"yahoo.com",
 	"foxmail.com",
 }
+var EmailLoginAuthServerList = []string{
+	"smtp.sendcloud.net",
+	"smtp.azurecomm.net",
+}

-var DebugEnabled = os.Getenv("DEBUG") == "true"
-var MemoryCacheEnabled = os.Getenv("MEMORY_CACHE_ENABLED") == "true"
+var DebugEnabled bool
+var MemoryCacheEnabled bool

 var LogConsumeEnabled = true

@@ -76,7 +81,6 @@ var SMTPToken = ""

 var GitHubClientId = ""
 var GitHubClientSecret = ""
-
 var LinuxDOClientId = ""
 var LinuxDOClientSecret = ""

@@ -103,22 +107,22 @@ var RetryTimes = 0

 //var RootUserEmail = ""

-var IsMasterNode = os.Getenv("NODE_TYPE") != "slave"
+var IsMasterNode bool

-var requestInterval, _ = strconv.Atoi(os.Getenv("POLLING_INTERVAL"))
-var RequestInterval = time.Duration(requestInterval) * time.Second
+var requestInterval int
+var RequestInterval time.Duration

-var SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60) // unit is second
+var SyncFrequency int // unit is second

 var BatchUpdateEnabled = false
-var BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
+var BatchUpdateInterval int

-var RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0) // unit is second
+var RelayTimeout int // unit is second

-var GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
+var GeminiSafetySetting string

 // https://docs.cohere.com/docs/safety-modes Type; NONE/CONTEXTUAL/STRICT
-var CohereSafetySetting = GetEnvOrDefaultString("COHERE_SAFETY_SETTING", "NONE")
+var CohereSafetySetting string

 const (
 	RequestIdKey = "X-Oneapi-Request-Id"
@@ -145,13 +149,13 @@ var (
 // All duration's unit is seconds
 // Shouldn't larger then RateLimitKeyExpirationDuration
 var (
-	GlobalApiRateLimitEnable   = GetEnvOrDefaultBool("GLOBAL_API_RATE_LIMIT_ENABLE", true)
-	GlobalApiRateLimitNum      = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 180)
-	GlobalApiRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_API_RATE_LIMIT_DURATION", 180))
+	GlobalApiRateLimitEnable   bool
+	GlobalApiRateLimitNum      int
+	GlobalApiRateLimitDuration int64

-	GlobalWebRateLimitEnable   = GetEnvOrDefaultBool("GLOBAL_WEB_RATE_LIMIT_ENABLE", true)
-	GlobalWebRateLimitNum      = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
-	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))
+	GlobalWebRateLimitEnable   bool
+	GlobalWebRateLimitNum      int
+	GlobalWebRateLimitDuration int64

 	UploadRateLimitNum            = 10
 	UploadRateLimitDuration int64 = 60
@@ -234,6 +238,8 @@ const (
 	ChannelTypeMokaAI         = 44
 	ChannelTypeVolcEngine     = 45
 	ChannelTypeBaiduV2        = 46
+	ChannelTypeXinference     = 47
+	ChannelTypeXai            = 48
 	ChannelTypeDummy          // this one is only for count, do not add any channel after this

 )
@@ -276,7 +282,7 @@ var ChannelBaseURLs = []string{
 	"https://api.cohere.ai",                     //34
 	"https://api.minimax.chat",                  //35
 	"",                                          //36
-	"",                                          //37
+	"https://api.dify.ai",                       //37
 	"https://api.jina.ai",                       //38
 	"https://api.cloudflare.com",                //39
 	"https://api.siliconflow.cn",                //40
@@ -286,4 +292,6 @@ var ChannelBaseURLs = []string{
 	"https://api.moka.ai",                       //44
 	"https://ark.cn-beijing.volces.com",         //45
 	"https://qianfan.baidubce.com",              //46
+	"",                                          //47
+	"https://api.x.ai",                          //48
 }
--- a/common/custom-event.go
+++ b/common/custom-event.go
@@ -44,7 +44,7 @@ var fieldReplacer = strings.NewReplacer(
 	"\r", "\\r")

 var dataReplacer = strings.NewReplacer(
-	"\n", "\ndata:",
+	"\n", "\n",
 	"\r", "\\r")

 type CustomEvent struct {
--- a/common/email.go
+++ b/common/email.go
@@ -5,6 +5,7 @@ import (
 	"encoding/base64"
 	"fmt"
 	"net/smtp"
+	"slices"
 	"strings"
 	"time"
 )
@@ -79,7 +80,7 @@ func SendEmail(subject string, receiver string, content string) error {
 		if err != nil {
 			return err
 		}
-	} else if isOutlookServer(SMTPAccount) || SMTPServer == "smtp.azurecomm.net" {
+	} else if isOutlookServer(SMTPAccount) || slices.Contains(EmailLoginAuthServerList, SMTPServer) {
 		auth = LoginAuth(SMTPAccount, SMTPToken)
 		err = smtp.SendMail(addr, auth, SMTPFrom, to, mail)
 	} else {
--- a/common/gopool.go
+++ b/common/gopool.go
@@ -0,0 +1,24 @@
+package common
+
+import (
+	"context"
+	"fmt"
+	"github.com/bytedance/gopkg/util/gopool"
+	"math"
+)
+
+var relayGoPool gopool.Pool
+
+func init() {
+	relayGoPool = gopool.NewPool("gopool.RelayPool", math.MaxInt32, gopool.NewConfig())
+	relayGoPool.SetPanicHandler(func(ctx context.Context, i interface{}) {
+		if stopChan, ok := ctx.Value("stop_chan").(chan bool); ok {
+			SafeSendBool(stopChan, true)
+		}
+		SysError(fmt.Sprintf("panic in gopool.RelayPool: %v", i))
+	})
+}
+
+func RelayCtxGo(ctx context.Context, f func()) {
+	relayGoPool.CtxGo(ctx, f)
+}
--- a/common/init.go
+++ b/common/init.go
@@ -6,6 +6,8 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"strconv"
+	"time"
 )

 var (
@@ -66,4 +68,31 @@ func LoadEnv() {
 			}
 		}
 	}
+
+	// Initialize variables from constants.go that were using environment variables
+	DebugEnabled = os.Getenv("DEBUG") == "true"
+	MemoryCacheEnabled = os.Getenv("MEMORY_CACHE_ENABLED") == "true"
+	IsMasterNode = os.Getenv("NODE_TYPE") != "slave"
+
+	// Parse requestInterval and set RequestInterval
+	requestInterval, _ = strconv.Atoi(os.Getenv("POLLING_INTERVAL"))
+	RequestInterval = time.Duration(requestInterval) * time.Second
+
+	// Initialize variables with GetEnvOrDefault
+	SyncFrequency = GetEnvOrDefault("SYNC_FREQUENCY", 60)
+	BatchUpdateInterval = GetEnvOrDefault("BATCH_UPDATE_INTERVAL", 5)
+	RelayTimeout = GetEnvOrDefault("RELAY_TIMEOUT", 0)
+
+	// Initialize string variables with GetEnvOrDefaultString
+	GeminiSafetySetting = GetEnvOrDefaultString("GEMINI_SAFETY_SETTING", "BLOCK_NONE")
+	CohereSafetySetting = GetEnvOrDefaultString("COHERE_SAFETY_SETTING", "NONE")
+
+	// Initialize rate limit variables
+	GlobalApiRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_API_RATE_LIMIT_ENABLE", true)
+	GlobalApiRateLimitNum = GetEnvOrDefault("GLOBAL_API_RATE_LIMIT", 180)
+	GlobalApiRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_API_RATE_LIMIT_DURATION", 180))
+
+	GlobalWebRateLimitEnable = GetEnvOrDefaultBool("GLOBAL_WEB_RATE_LIMIT_ENABLE", true)
+	GlobalWebRateLimitNum = GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT", 60)
+	GlobalWebRateLimitDuration = int64(GetEnvOrDefault("GLOBAL_WEB_RATE_LIMIT_DURATION", 180))
 }
--- a/common/json.go
+++ b/common/json.go
@@ -0,0 +1,18 @@
+package common
+
+import (
+	"bytes"
+	"encoding/json"
+)
+
+func DecodeJson(data []byte, v any) error {
+	return json.NewDecoder(bytes.NewReader(data)).Decode(v)
+}
+
+func DecodeJsonStr(data string, v any) error {
+	return DecodeJson(StringToByteSlice(data), v)
+}
+
+func EncodeJson(v any) ([]byte, error) {
+	return json.Marshal(v)
+}
--- a/common/limiter/limiter.go
+++ b/common/limiter/limiter.go
@@ -0,0 +1,89 @@
+package limiter
+
+import (
+	"context"
+	_ "embed"
+	"fmt"
+	"github.com/go-redis/redis/v8"
+	"one-api/common"
+	"sync"
+)
+
+//go:embed lua/rate_limit.lua
+var rateLimitScript string
+
+type RedisLimiter struct {
+	client         *redis.Client
+	limitScriptSHA string
+}
+
+var (
+	instance *RedisLimiter
+	once     sync.Once
+)
+
+func New(ctx context.Context, r *redis.Client) *RedisLimiter {
+	once.Do(func() {
+		// 预加载脚本
+		limitSHA, err := r.ScriptLoad(ctx, rateLimitScript).Result()
+		if err != nil {
+			common.SysLog(fmt.Sprintf("Failed to load rate limit script: %v", err))
+		}
+		instance = &RedisLimiter{
+			client:         r,
+			limitScriptSHA: limitSHA,
+		}
+	})
+
+	return instance
+}
+
+func (rl *RedisLimiter) Allow(ctx context.Context, key string, opts ...Option) (bool, error) {
+	// 默认配置
+	config := &Config{
+		Capacity:  10,
+		Rate:      1,
+		Requested: 1,
+	}
+
+	// 应用选项模式
+	for _, opt := range opts {
+		opt(config)
+	}
+
+	// 执行限流
+	result, err := rl.client.EvalSha(
+		ctx,
+		rl.limitScriptSHA,
+		[]string{key},
+		config.Requested,
+		config.Rate,
+		config.Capacity,
+	).Int()
+
+	if err != nil {
+		return false, fmt.Errorf("rate limit failed: %w", err)
+	}
+	return result == 1, nil
+}
+
+// Config 配置选项模式
+type Config struct {
+	Capacity  int64
+	Rate      int64
+	Requested int64
+}
+
+type Option func(*Config)
+
+func WithCapacity(c int64) Option {
+	return func(cfg *Config) { cfg.Capacity = c }
+}
+
+func WithRate(r int64) Option {
+	return func(cfg *Config) { cfg.Rate = r }
+}
+
+func WithRequested(n int64) Option {
+	return func(cfg *Config) { cfg.Requested = n }
+}
--- a/common/limiter/lua/rate_limit.lua
+++ b/common/limiter/lua/rate_limit.lua
@@ -0,0 +1,44 @@
+-- 令牌桶限流器
+-- KEYS[1]: 限流器唯一标识
+-- ARGV[1]: 请求令牌数 (通常为1)
+-- ARGV[2]: 令牌生成速率 (每秒)
+-- ARGV[3]: 桶容量
+
+local key = KEYS[1]
+local requested = tonumber(ARGV[1])
+local rate = tonumber(ARGV[2])
+local capacity = tonumber(ARGV[3])
+
+-- 获取当前时间（Redis服务器时间）
+local now = redis.call('TIME')
+local nowInSeconds = tonumber(now[1])
+
+-- 获取桶状态
+local bucket = redis.call('HMGET', key, 'tokens', 'last_time')
+local tokens = tonumber(bucket[1])
+local last_time = tonumber(bucket[2])
+
+-- 初始化桶（首次请求或过期）
+if not tokens or not last_time then
+    tokens = capacity
+    last_time = nowInSeconds
+else
+    -- 计算新增令牌
+    local elapsed = nowInSeconds - last_time
+    local add_tokens = elapsed * rate
+    tokens = math.min(capacity, tokens + add_tokens)
+    last_time = nowInSeconds
+end
+
+-- 判断是否允许请求
+local allowed = false
+if tokens >= requested then
+    tokens = tokens - requested
+    allowed = true
+end
+
+---- 更新桶状态并设置过期时间
+redis.call('HMSET', key, 'tokens', tokens, 'last_time', last_time)
+--redis.call('EXPIRE', key, math.ceil(capacity / rate) + 60) -- 适当延长过期时间
+
+return allowed and 1 or 0
--- a/common/model-ratio.go
+++ b/common/model-ratio.go
@@ -1,493 +0,0 @@
-package common
-
-import (
-	"encoding/json"
-	"strings"
-	"sync"
-)
-
-// from songquanpeng/one-api
-const (
-	USD2RMB = 7.3 // 暂定 1 USD = 7.3 RMB
-	USD     = 500 // $0.002 = 1 -> $1 = 500
-	RMB     = USD / USD2RMB
-)
-
-// modelRatio
-// https://platform.openai.com/docs/models/model-endpoint-compatibility
-// https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf
-// https://openai.com/pricing
-// TODO: when a new api is enabled, check the pricing here
-// 1 === $0.002 / 1K tokens
-// 1 === ￥0.014 / 1k tokens
-
-var defaultModelRatio = map[string]float64{
-	//"midjourney":                50,
-	"gpt-4-gizmo-*":  15,
-	"gpt-4o-gizmo-*": 2.5,
-	"gpt-4-all":      15,
-	"gpt-4o-all":     15,
-	"gpt-4":          15,
-	//"gpt-4-0314":                   15, //deprecated
-	"gpt-4-0613": 15,
-	"gpt-4-32k":  30,
-	//"gpt-4-32k-0314":               30, //deprecated
-	"gpt-4-32k-0613":                          30,
-	"gpt-4-1106-preview":                      5,    // $10 / 1M tokens
-	"gpt-4-0125-preview":                      5,    // $10 / 1M tokens
-	"gpt-4-turbo-preview":                     5,    // $10 / 1M tokens
-	"gpt-4-vision-preview":                    5,    // $10 / 1M tokens
-	"gpt-4-1106-vision-preview":               5,    // $10 / 1M tokens
-	"chatgpt-4o-latest":                       2.5,  // $5 / 1M tokens
-	"gpt-4o":                                  1.25, // $2.5 / 1M tokens
-	"gpt-4o-audio-preview":                    1.25, // $2.5 / 1M tokens
-	"gpt-4o-audio-preview-2024-10-01":         1.25, // $2.5 / 1M tokens
-	"gpt-4o-2024-05-13":                       2.5,  // $5 / 1M tokens
-	"gpt-4o-2024-08-06":                       1.25, // $2.5 / 1M tokens
-	"gpt-4o-2024-11-20":                       1.25, // $2.5 / 1M tokens
-	"gpt-4o-realtime-preview":                 2.5,
-	"gpt-4o-realtime-preview-2024-10-01":      2.5,
-	"gpt-4o-realtime-preview-2024-12-17":      2.5,
-	"gpt-4o-mini-realtime-preview":            0.3,
-	"gpt-4o-mini-realtime-preview-2024-12-17": 0.3,
-	"o1":                        7.5,
-	"o1-2024-12-17":             7.5,
-	"o1-preview":                7.5,
-	"o1-preview-2024-09-12":     7.5,
-	"o1-mini":                   0.55,
-	"o1-mini-2024-09-12":        0.55,
-	"o3-mini":                   0.55,
-	"o3-mini-2025-01-31":        0.55,
-	"o3-mini-high":              0.55,
-	"o3-mini-2025-01-31-high":   0.55,
-	"o3-mini-low":               0.55,
-	"o3-mini-2025-01-31-low":    0.55,
-	"o3-mini-medium":            0.55,
-	"o3-mini-2025-01-31-medium": 0.55,
-	"gpt-4o-mini":               0.075,
-	"gpt-4o-mini-2024-07-18":    0.075,
-	"gpt-4-turbo":               5, // $0.01 / 1K tokens
-	"gpt-4-turbo-2024-04-09":    5, // $0.01 / 1K tokens
-	//"gpt-3.5-turbo-0301":           0.75, //deprecated
-	"gpt-3.5-turbo":          0.25,
-	"gpt-3.5-turbo-0613":     0.75,
-	"gpt-3.5-turbo-16k":      1.5, // $0.003 / 1K tokens
-	"gpt-3.5-turbo-16k-0613": 1.5,
-	"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
-	"gpt-3.5-turbo-1106":     0.5,  // $0.001 / 1K tokens
-	"gpt-3.5-turbo-0125":     0.25,
-	"babbage-002":            0.2, // $0.0004 / 1K tokens
-	"davinci-002":            1,   // $0.002 / 1K tokens
-	"text-ada-001":           0.2,
-	"text-babbage-001":       0.25,
-	"text-curie-001":         1,
-	//"text-davinci-002":               10,
-	//"text-davinci-003":               10,
-	"text-davinci-edit-001":          10,
-	"code-davinci-edit-001":          10,
-	"whisper-1":                      15,  // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
-	"tts-1":                          7.5, // 1k characters -> $0.015
-	"tts-1-1106":                     7.5, // 1k characters -> $0.015
-	"tts-1-hd":                       15,  // 1k characters -> $0.03
-	"tts-1-hd-1106":                  15,  // 1k characters -> $0.03
-	"davinci":                        10,
-	"curie":                          10,
-	"babbage":                        10,
-	"ada":                            10,
-	"text-embedding-3-small":         0.01,
-	"text-embedding-3-large":         0.065,
-	"text-embedding-ada-002":         0.05,
-	"text-search-ada-doc-001":        10,
-	"text-moderation-stable":         0.1,
-	"text-moderation-latest":         0.1,
-	"claude-instant-1":               0.4,   // $0.8 / 1M tokens
-	"claude-2.0":                     4,     // $8 / 1M tokens
-	"claude-2.1":                     4,     // $8 / 1M tokens
-	"claude-3-haiku-20240307":        0.125, // $0.25 / 1M tokens
-	"claude-3-5-haiku-20241022":      0.5,   // $1 / 1M tokens
-	"claude-3-sonnet-20240229":       1.5,   // $3 / 1M tokens
-	"claude-3-5-sonnet-20240620":     1.5,
-	"claude-3-5-sonnet-20241022":     1.5,
-	"claude-3-opus-20240229":         7.5, // $15 / 1M tokens
-	"ERNIE-4.0-8K":                   0.120 * RMB,
-	"ERNIE-3.5-8K":                   0.012 * RMB,
-	"ERNIE-3.5-8K-0205":              0.024 * RMB,
-	"ERNIE-3.5-8K-1222":              0.012 * RMB,
-	"ERNIE-Bot-8K":                   0.024 * RMB,
-	"ERNIE-3.5-4K-0205":              0.012 * RMB,
-	"ERNIE-Speed-8K":                 0.004 * RMB,
-	"ERNIE-Speed-128K":               0.004 * RMB,
-	"ERNIE-Lite-8K-0922":             0.008 * RMB,
-	"ERNIE-Lite-8K-0308":             0.003 * RMB,
-	"ERNIE-Tiny-8K":                  0.001 * RMB,
-	"BLOOMZ-7B":                      0.004 * RMB,
-	"Embedding-V1":                   0.002 * RMB,
-	"bge-large-zh":                   0.002 * RMB,
-	"bge-large-en":                   0.002 * RMB,
-	"tao-8k":                         0.002 * RMB,
-	"PaLM-2":                         1,
-	"gemini-pro":                     1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":              1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-1.0-pro-vision-001":      1,
-	"gemini-1.0-pro-001":             1,
-	"gemini-1.5-pro-latest":          1.75, // $3.5 / 1M tokens
-	"gemini-1.5-pro-exp-0827":        1.75, // $3.5 / 1M tokens
-	"gemini-1.5-flash-latest":        1,
-	"gemini-1.5-flash-exp-0827":      1,
-	"gemini-1.0-pro-latest":          1,
-	"gemini-1.0-pro-vision-latest":   1,
-	"gemini-ultra":                   1,
-	"chatglm_turbo":                  0.3572,     // ￥0.005 / 1k tokens
-	"chatglm_pro":                    0.7143,     // ￥0.01 / 1k tokens
-	"chatglm_std":                    0.3572,     // ￥0.005 / 1k tokens
-	"chatglm_lite":                   0.1429,     // ￥0.002 / 1k tokens
-	"glm-4":                          7.143,      // ￥0.1 / 1k tokens
-	"glm-4v":                         0.05 * RMB, // ￥0.05 / 1k tokens
-	"glm-4-alltools":                 0.1 * RMB,  // ￥0.1 / 1k tokens
-	"glm-3-turbo":                    0.3572,
-	"glm-4-plus":                     0.05 * RMB,
-	"glm-4-0520":                     0.1 * RMB,
-	"glm-4-air":                      0.001 * RMB,
-	"glm-4-airx":                     0.01 * RMB,
-	"glm-4-long":                     0.001 * RMB,
-	"glm-4-flash":                    0,
-	"glm-4v-plus":                    0.01 * RMB,
-	"qwen-turbo":                     0.8572, // ￥0.012 / 1k tokens
-	"qwen-plus":                      10,     // ￥0.14 / 1k tokens
-	"text-embedding-v1":              0.05,   // ￥0.0007 / 1k tokens
-	"SparkDesk-v1.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v2.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v3.1":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v3.5":                 1.2858, // ￥0.018 / 1k tokens
-	"SparkDesk-v4.0":                 1.2858,
-	"360GPT_S2_V9":                   0.8572, // ¥0.012 / 1k tokens
-	"360gpt-turbo":                   0.0858, // ¥0.0012 / 1k tokens
-	"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
-	"360gpt-pro":                     0.8572, // ¥0.012 / 1k tokens
-	"360gpt2-pro":                    0.8572, // ¥0.012 / 1k tokens
-	"embedding-bert-512-v1":          0.0715, // ¥0.001 / 1k tokens
-	"embedding_s1_v1":                0.0715, // ¥0.001 / 1k tokens
-	"semantic_similarity_s1_v1":      0.0715, // ¥0.001 / 1k tokens
-	"hunyuan":                        7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
-	// https://platform.lingyiwanwu.com/docs#-计费单元
-	// 已经按照 7.2 来换算美元价格
-	"yi-34b-chat-0205":       0.18,
-	"yi-34b-chat-200k":       0.864,
-	"yi-vl-plus":             0.432,
-	"yi-large":               20.0 / 1000 * RMB,
-	"yi-medium":              2.5 / 1000 * RMB,
-	"yi-vision":              6.0 / 1000 * RMB,
-	"yi-medium-200k":         12.0 / 1000 * RMB,
-	"yi-spark":               1.0 / 1000 * RMB,
-	"yi-large-rag":           25.0 / 1000 * RMB,
-	"yi-large-turbo":         12.0 / 1000 * RMB,
-	"yi-large-preview":       20.0 / 1000 * RMB,
-	"yi-large-rag-preview":   25.0 / 1000 * RMB,
-	"command":                0.5,
-	"command-nightly":        0.5,
-	"command-light":          0.5,
-	"command-light-nightly":  0.5,
-	"command-r":              0.25,
-	"command-r-plus":         1.5,
-	"command-r-08-2024":      0.075,
-	"command-r-plus-08-2024": 1.25,
-	"deepseek-chat":          0.27 / 2,
-	"deepseek-coder":         0.27 / 2,
-	"deepseek-reasoner":      0.55 / 2, // 0.55 / 1k tokens
-	// Perplexity online 模型对搜索额外收费，有需要应自行调整，此处不计入搜索费用
-	"llama-3-sonar-small-32k-chat":   0.2 / 1000 * USD,
-	"llama-3-sonar-small-32k-online": 0.2 / 1000 * USD,
-	"llama-3-sonar-large-32k-chat":   1 / 1000 * USD,
-	"llama-3-sonar-large-32k-online": 1 / 1000 * USD,
-}
-
-var defaultModelPrice = map[string]float64{
-	"suno_music":        0.1,
-	"suno_lyrics":       0.01,
-	"dall-e-3":          0.04,
-	"gpt-4-gizmo-*":     0.1,
-	"mj_imagine":        0.1,
-	"mj_variation":      0.1,
-	"mj_reroll":         0.1,
-	"mj_blend":          0.1,
-	"mj_modal":          0.1,
-	"mj_zoom":           0.1,
-	"mj_shorten":        0.1,
-	"mj_high_variation": 0.1,
-	"mj_low_variation":  0.1,
-	"mj_pan":            0.1,
-	"mj_inpaint":        0,
-	"mj_custom_zoom":    0,
-	"mj_describe":       0.05,
-	"mj_upscale":        0.05,
-	"swap_face":         0.05,
-	"mj_upload":         0.05,
-}
-
-var (
-	modelPriceMap      map[string]float64 = nil
-	modelPriceMapMutex                    = sync.RWMutex{}
-)
-var (
-	modelRatioMap      map[string]float64 = nil
-	modelRatioMapMutex                    = sync.RWMutex{}
-)
-
-var (
-	CompletionRatio      map[string]float64 = nil
-	CompletionRatioMutex                    = sync.RWMutex{}
-)
-
-var defaultCompletionRatio = map[string]float64{
-	"gpt-4-gizmo-*":  2,
-	"gpt-4o-gizmo-*": 3,
-	"gpt-4-all":      2,
-}
-
-func GetModelPriceMap() map[string]float64 {
-	modelPriceMapMutex.Lock()
-	defer modelPriceMapMutex.Unlock()
-	if modelPriceMap == nil {
-		modelPriceMap = defaultModelPrice
-	}
-	return modelPriceMap
-}
-
-func ModelPrice2JSONString() string {
-	GetModelPriceMap()
-	jsonBytes, err := json.Marshal(modelPriceMap)
-	if err != nil {
-		SysError("error marshalling model price: " + err.Error())
-	}
-	return string(jsonBytes)
-}
-
-func UpdateModelPriceByJSONString(jsonStr string) error {
-	modelPriceMapMutex.Lock()
-	defer modelPriceMapMutex.Unlock()
-	modelPriceMap = make(map[string]float64)
-	return json.Unmarshal([]byte(jsonStr), &modelPriceMap)
-}
-
-// GetModelPrice 返回模型的价格，如果模型不存在则返回-1，false
-func GetModelPrice(name string, printErr bool) (float64, bool) {
-	GetModelPriceMap()
-	if strings.HasPrefix(name, "gpt-4-gizmo") {
-		name = "gpt-4-gizmo-*"
-	}
-	if strings.HasPrefix(name, "gpt-4o-gizmo") {
-		name = "gpt-4o-gizmo-*"
-	}
-	price, ok := modelPriceMap[name]
-	if !ok {
-		if printErr {
-			SysError("model price not found: " + name)
-		}
-		return -1, false
-	}
-	return price, true
-}
-
-func GetModelRatioMap() map[string]float64 {
-	modelRatioMapMutex.Lock()
-	defer modelRatioMapMutex.Unlock()
-	if modelRatioMap == nil {
-		modelRatioMap = defaultModelRatio
-	}
-	return modelRatioMap
-}
-
-func ModelRatio2JSONString() string {
-	GetModelRatioMap()
-	jsonBytes, err := json.Marshal(modelRatioMap)
-	if err != nil {
-		SysError("error marshalling model ratio: " + err.Error())
-	}
-	return string(jsonBytes)
-}
-
-func UpdateModelRatioByJSONString(jsonStr string) error {
-	modelRatioMapMutex.Lock()
-	defer modelRatioMapMutex.Unlock()
-	modelRatioMap = make(map[string]float64)
-	return json.Unmarshal([]byte(jsonStr), &modelRatioMap)
-}
-
-func GetModelRatio(name string) float64 {
-	GetModelRatioMap()
-	if strings.HasPrefix(name, "gpt-4-gizmo") {
-		name = "gpt-4-gizmo-*"
-	}
-	ratio, ok := modelRatioMap[name]
-	if !ok {
-		SysError("model ratio not found: " + name)
-		return 30
-	}
-	return ratio
-}
-
-func DefaultModelRatio2JSONString() string {
-	jsonBytes, err := json.Marshal(defaultModelRatio)
-	if err != nil {
-		SysError("error marshalling model ratio: " + err.Error())
-	}
-	return string(jsonBytes)
-}
-
-func GetDefaultModelRatioMap() map[string]float64 {
-	return defaultModelRatio
-}
-
-func GetCompletionRatioMap() map[string]float64 {
-	CompletionRatioMutex.Lock()
-	defer CompletionRatioMutex.Unlock()
-	if CompletionRatio == nil {
-		CompletionRatio = defaultCompletionRatio
-	}
-	return CompletionRatio
-}
-
-func CompletionRatio2JSONString() string {
-	GetCompletionRatioMap()
-	jsonBytes, err := json.Marshal(CompletionRatio)
-	if err != nil {
-		SysError("error marshalling completion ratio: " + err.Error())
-	}
-	return string(jsonBytes)
-}
-
-func UpdateCompletionRatioByJSONString(jsonStr string) error {
-	CompletionRatioMutex.Lock()
-	defer CompletionRatioMutex.Unlock()
-	CompletionRatio = make(map[string]float64)
-	return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
-}
-
-func GetCompletionRatio(name string) float64 {
-	GetCompletionRatioMap()
-
-	if strings.Contains(name, "/") {
-		if ratio, ok := CompletionRatio[name]; ok {
-			return ratio
-		}
-	}
-	lowercaseName := strings.ToLower(name)
-	if strings.HasPrefix(name, "gpt-4-gizmo") {
-		name = "gpt-4-gizmo-*"
-	}
-	if strings.HasPrefix(name, "gpt-4o-gizmo") {
-		name = "gpt-4o-gizmo-*"
-	}
-	if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") {
-		if strings.HasPrefix(name, "gpt-4o") {
-			if name == "gpt-4o-2024-05-13" {
-				return 3
-			}
-			return 4
-		}
-		if strings.HasPrefix(name, "gpt-4-turbo") || strings.HasSuffix(name, "preview") {
-			return 3
-		}
-		return 2
-	}
-	if strings.HasPrefix(name, "o1") || strings.HasPrefix(name, "o3") {
-		return 4
-	}
-	if name == "chatgpt-4o-latest" {
-		return 3
-	}
-	if strings.Contains(name, "claude-instant-1") {
-		return 3
-	} else if strings.Contains(name, "claude-2") {
-		return 3
-	} else if strings.Contains(name, "claude-3") {
-		return 5
-	}
-	if strings.HasPrefix(name, "gpt-3.5") {
-		if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") {
-			// https://openai.com/blog/new-embedding-models-and-api-updates
-			// Updated GPT-3.5 Turbo model and lower pricing
-			return 3
-		}
-		if strings.HasSuffix(name, "1106") {
-			return 2
-		}
-		return 4.0 / 3.0
-	}
-	if strings.HasPrefix(name, "mistral-") {
-		return 3
-	}
-	if strings.HasPrefix(name, "gemini-") {
-		return 4
-	}
-	if strings.HasPrefix(name, "command") {
-		switch name {
-		case "command-r":
-			return 3
-		case "command-r-plus":
-			return 5
-		case "command-r-08-2024":
-			return 4
-		case "command-r-plus-08-2024":
-			return 4
-		default:
-			return 4
-		}
-	}
-	// hint 只给官方上4倍率，由于开源模型供应商自行定价，不对其进行补全倍率进行强制对齐
-	if lowercaseName == "deepseek-chat" || lowercaseName == "deepseek-reasoner" {
-		return 4
-	}
-	if strings.HasPrefix(name, "ERNIE-Speed-") {
-		return 2
-	} else if strings.HasPrefix(name, "ERNIE-Lite-") {
-		return 2
-	} else if strings.HasPrefix(name, "ERNIE-Character") {
-		return 2
-	} else if strings.HasPrefix(name, "ERNIE-Functions") {
-		return 2
-	}
-	switch name {
-	case "llama2-70b-4096":
-		return 0.8 / 0.64
-	case "llama3-8b-8192":
-		return 2
-	case "llama3-70b-8192":
-		return 0.79 / 0.59
-	}
-	if ratio, ok := CompletionRatio[name]; ok {
-		return ratio
-	}
-	return 1
-}
-
-func GetAudioRatio(name string) float64 {
-	if strings.Contains(name, "-realtime") {
-		if strings.HasSuffix(name, "gpt-4o-realtime-preview-2024-12-17") {
-			return 8
-		} else if strings.Contains(name, "mini") {
-			return 10 / 0.6
-		} else {
-			return 20
-		}
-	}
-	if strings.Contains(name, "-audio") {
-		if strings.HasSuffix(name, "gpt-4o-audio-preview-2024-12-17") {
-			return 16
-		} else if strings.Contains(name, "mini") {
-			return 10 / 0.15
-		} else {
-			return 40
-		}
-	}
-	return 20
-}
-
-func GetAudioCompletionRatio(name string) float64 {
-	if strings.HasPrefix(name, "gpt-4o-realtime") {
-		return 2
-	} else if strings.HasPrefix(name, "gpt-4o-mini-realtime") {
-		return 2
-	}
-	return 2
-}
--- a/common/redis.go
+++ b/common/redis.go
@@ -32,6 +32,7 @@ func InitRedisClient() (err error) {
 	if err != nil {
 		FatalLog("failed to parse Redis connection string: " + err.Error())
 	}
+	opt.PoolSize = GetEnvOrDefault("REDIS_POOL_SIZE", 10)
 	RDB = redis.NewClient(opt)

 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
@@ -41,6 +42,10 @@ func InitRedisClient() (err error) {
 	if err != nil {
 		FatalLog("Redis ping test failed: " + err.Error())
 	}
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis connected to %s", opt.Addr))
+		SysLog(fmt.Sprintf("Redis database: %d", opt.DB))
+	}
 	return err
 }

@@ -53,13 +58,20 @@ func ParseRedisOption() *redis.Options {
 }

 func RedisSet(key string, value string, expiration time.Duration) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis SET: key=%s, value=%s, expiration=%v", key, value, expiration))
+	}
 	ctx := context.Background()
 	return RDB.Set(ctx, key, value, expiration).Err()
 }

 func RedisGet(key string) (string, error) {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis GET: key=%s", key))
+	}
 	ctx := context.Background()
-	return RDB.Get(ctx, key).Result()
+	val, err := RDB.Get(ctx, key).Result()
+	return val, err
 }

 //func RedisExpire(key string, expiration time.Duration) error {
@@ -73,16 +85,25 @@ func RedisGet(key string) (string, error) {
 //}

 func RedisDel(key string) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis DEL: key=%s", key))
+	}
 	ctx := context.Background()
 	return RDB.Del(ctx, key).Err()
 }

 func RedisHDelObj(key string) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis HDEL: key=%s", key))
+	}
 	ctx := context.Background()
 	return RDB.HDel(ctx, key).Err()
 }

 func RedisHSetObj(key string, obj interface{}, expiration time.Duration) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis HSET: key=%s, obj=%+v, expiration=%v", key, obj, expiration))
+	}
 	ctx := context.Background()

 	data := make(map[string]interface{})
@@ -130,6 +151,9 @@ func RedisHSetObj(key string, obj interface{}, expiration time.Duration) error {
 }

 func RedisHGetObj(key string, obj interface{}) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis HGETALL: key=%s", key))
+	}
 	ctx := context.Background()

 	result, err := RDB.HGetAll(ctx, key).Result()
@@ -208,6 +232,9 @@ func RedisHGetObj(key string, obj interface{}) error {

 // RedisIncr Add this function to handle atomic increments
 func RedisIncr(key string, delta int64) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis INCR: key=%s, delta=%d", key, delta))
+	}
 	// 检查键的剩余生存时间
 	ttlCmd := RDB.TTL(context.Background(), key)
 	ttl, err := ttlCmd.Result()
@@ -238,6 +265,9 @@ func RedisIncr(key string, delta int64) error {
 }

 func RedisHIncrBy(key, field string, delta int64) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis HINCRBY: key=%s, field=%s, delta=%d", key, field, delta))
+	}
 	ttlCmd := RDB.TTL(context.Background(), key)
 	ttl, err := ttlCmd.Result()
 	if err != nil && !errors.Is(err, redis.Nil) {
@@ -262,6 +292,9 @@ func RedisHIncrBy(key, field string, delta int64) error {
 }

 func RedisHSetField(key, field string, value interface{}) error {
+	if DebugEnabled {
+		SysLog(fmt.Sprintf("Redis HSET field: key=%s, field=%s, value=%v", key, field, value))
+	}
 	ttlCmd := RDB.TTL(context.Background(), key)
 	ttl, err := ttlCmd.Result()
 	if err != nil && !errors.Is(err, redis.Nil) {
--- a/common/utils.go
+++ b/common/utils.go
@@ -5,8 +5,8 @@ import (
 	"context"
 	crand "crypto/rand"
 	"encoding/base64"
+	"encoding/json"
 	"fmt"
-	"github.com/pkg/errors"
 	"html/template"
 	"io"
 	"log"
@@ -21,6 +21,7 @@ import (
 	"time"

 	"github.com/google/uuid"
+	"github.com/pkg/errors"
 )

 func OpenBrowser(url string) {
@@ -213,6 +214,24 @@ func RandomSleep() {
 	time.Sleep(time.Duration(rand.Intn(3000)) * time.Millisecond)
 }

+func GetPointer[T any](v T) *T {
+	return &v
+}
+
+func Any2Type[T any](data any) (T, error) {
+	var zero T
+	bytes, err := json.Marshal(data)
+	if err != nil {
+		return zero, err
+	}
+	var res T
+	err = json.Unmarshal(bytes, &res)
+	if err != nil {
+		return zero, err
+	}
+	return res, nil
+}
+
 // SaveTmpFile saves data to a temporary file. The filename would be apppended with a random string.
 func SaveTmpFile(filename string, data io.Reader) (string, error) {
 	f, err := os.CreateTemp(os.TempDir(), filename)
--- a/constant/azure.go
+++ b/constant/azure.go
@@ -0,0 +1,5 @@
+package constant
+
+import "time"
+
+var AzureNoRemoveDotTime = time.Date(2025, time.May, 10, 0, 0, 0, 0, time.UTC).Unix()
--- a/constant/channel_setting.go
+++ b/constant/channel_setting.go
@@ -1,6 +1,7 @@
 package constant

 var (
-	ForceFormat        = "force_format" // ForceFormat 强制格式化为OpenAI格式
-	ChanelSettingProxy = "proxy"        // Proxy 代理
+	ForceFormat                     = "force_format"        // ForceFormat 强制格式化为OpenAI格式
+	ChanelSettingProxy              = "proxy"               // Proxy 代理
+	ChannelSettingThinkingToContent = "thinking_to_content" // ThinkingToContent
 )
--- a/constant/context_key.go
+++ b/constant/context_key.go
@@ -2,4 +2,9 @@ package constant

 const (
 	ContextKeyRequestStartTime = "request_start_time"
+	ContextKeyUserSetting      = "user_setting"
+	ContextKeyUserQuota        = "user_quota"
+	ContextKeyUserStatus       = "user_status"
+	ContextKeyUserEmail        = "user_email"
+	ContextKeyUserGroup        = "user_group"
 )
--- a/constant/env.go
+++ b/constant/env.go
@@ -1,51 +1,55 @@
 package constant

 import (
-	"fmt"
 	"one-api/common"
-	"os"
-	"strings"
 )

-var StreamingTimeout = common.GetEnvOrDefault("STREAMING_TIMEOUT", 60)
-var DifyDebug = common.GetEnvOrDefaultBool("DIFY_DEBUG", true)
+var StreamingTimeout int
+var DifyDebug bool
+var MaxFileDownloadMB int
+var ForceStreamOption bool
+var GetMediaToken bool
+var GetMediaTokenNotStream bool
+var UpdateTask bool
+var AzureDefaultAPIVersion string
+var GeminiVisionMaxImageNum int
+var NotifyLimitCount int
+var NotificationLimitDurationMinute int
+var GenerateDefaultToken bool
+var ErrorLogEnabled bool

-var MaxFileDownloadMB = common.GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
-
-// ForceStreamOption 覆盖请求参数，强制返回usage信息
-var ForceStreamOption = common.GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
-
-var GetMediaToken = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
-
-var GetMediaTokenNotStream = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
-
-var UpdateTask = common.GetEnvOrDefaultBool("UPDATE_TASK", true)
-
-var AzureDefaultAPIVersion = common.GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2024-12-01-preview")
-
-var GeminiModelMap = map[string]string{
-	"gemini-1.0-pro": "v1",
-}
-
-var GeminiVisionMaxImageNum = common.GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
-
-var NotifyLimitCount = common.GetEnvOrDefault("NOTIFY_LIMIT_COUNT", 2)
-var NotificationLimitDurationMinute = common.GetEnvOrDefault("NOTIFICATION_LIMIT_DURATION_MINUTE", 10)
+//var GeminiModelMap = map[string]string{
+//	"gemini-1.0-pro": "v1",
+//}

 func InitEnv() {
-	modelVersionMapStr := strings.TrimSpace(os.Getenv("GEMINI_MODEL_MAP"))
-	if modelVersionMapStr == "" {
-		return
-	}
-	for _, pair := range strings.Split(modelVersionMapStr, ",") {
-		parts := strings.Split(pair, ":")
-		if len(parts) == 2 {
-			GeminiModelMap[parts[0]] = parts[1]
-		} else {
-			common.SysError(fmt.Sprintf("invalid model version map: %s", pair))
-		}
-	}
-}
+	StreamingTimeout = common.GetEnvOrDefault("STREAMING_TIMEOUT", 60)
+	DifyDebug = common.GetEnvOrDefaultBool("DIFY_DEBUG", true)
+	MaxFileDownloadMB = common.GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 20)
+	// ForceStreamOption 覆盖请求参数，强制返回usage信息
+	ForceStreamOption = common.GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
+	GetMediaToken = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN", true)
+	GetMediaTokenNotStream = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STREAM", true)
+	UpdateTask = common.GetEnvOrDefaultBool("UPDATE_TASK", true)
+	AzureDefaultAPIVersion = common.GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2024-12-01-preview")
+	GeminiVisionMaxImageNum = common.GetEnvOrDefault("GEMINI_VISION_MAX_IMAGE_NUM", 16)
+	NotifyLimitCount = common.GetEnvOrDefault("NOTIFY_LIMIT_COUNT", 2)
+	NotificationLimitDurationMinute = common.GetEnvOrDefault("NOTIFICATION_LIMIT_DURATION_MINUTE", 10)
+	// GenerateDefaultToken 是否生成初始令牌，默认关闭。
+	GenerateDefaultToken = common.GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
+	// 是否启用错误日志
+	ErrorLogEnabled = common.GetEnvOrDefaultBool("ERROR_LOG_ENABLED", false)

-// GenerateDefaultToken 是否生成初始令牌，默认关闭。
-var GenerateDefaultToken = common.GetEnvOrDefaultBool("GENERATE_DEFAULT_TOKEN", false)
+	//modelVersionMapStr := strings.TrimSpace(os.Getenv("GEMINI_MODEL_MAP"))
+	//if modelVersionMapStr == "" {
+	//	return
+	//}
+	//for _, pair := range strings.Split(modelVersionMapStr, ",") {
+	//	parts := strings.Split(pair, ":")
+	//	if len(parts) == 2 {
+	//		GeminiModelMap[parts[0]] = parts[1]
+	//	} else {
+	//		common.SysError(fmt.Sprintf("invalid model version map: %s", pair))
+	//	}
+	//}
+}
--- a/constant/setup.go
+++ b/constant/setup.go
@@ -0,0 +1,3 @@
+package constant
+
+var Setup = false
--- a/constant/user_setting.go
+++ b/constant/user_setting.go
@@ -1,11 +1,12 @@
 package constant

 var (
-	UserSettingNotifyType            = "notify_type"             // QuotaWarningType 额度预警类型
-	UserSettingQuotaWarningThreshold = "quota_warning_threshold" // QuotaWarningThreshold 额度预警阈值
-	UserSettingWebhookUrl            = "webhook_url"             // WebhookUrl webhook地址
-	UserSettingWebhookSecret         = "webhook_secret"          // WebhookSecret webhook密钥
-	UserSettingNotificationEmail     = "notification_email"      // NotificationEmail 通知邮箱地址
+	UserSettingNotifyType            = "notify_type"                    // QuotaWarningType 额度预警类型
+	UserSettingQuotaWarningThreshold = "quota_warning_threshold"        // QuotaWarningThreshold 额度预警阈值
+	UserSettingWebhookUrl            = "webhook_url"                    // WebhookUrl webhook地址
+	UserSettingWebhookSecret         = "webhook_secret"                 // WebhookSecret webhook密钥
+	UserSettingNotificationEmail     = "notification_email"             // NotificationEmail 通知邮箱地址
+	UserAcceptUnsetRatioModel        = "accept_unset_model_ratio_model" // AcceptUnsetRatioModel 是否接受未设置价格的模型
 )

 var (
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -17,6 +17,7 @@ import (
 	"one-api/relay"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strconv"
 	"strings"
@@ -48,7 +49,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	if strings.Contains(strings.ToLower(testModel), "embedding") ||
 		strings.HasPrefix(testModel, "m3e") || // m3e 系列模型
 		strings.Contains(testModel, "bge-") || // bge 系列模型
-		testModel == "text-embedding-v1" ||
+		strings.Contains(testModel, "embed") ||
 		channel.Type == common.ChannelTypeMokaAI { // 其他 embedding 模型
 		requestPath = "/v1/embeddings" // 修改请求路径
 	}
@@ -72,26 +73,29 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 		}
 	}

-	modelMapping := *channel.ModelMapping
-	if modelMapping != "" && modelMapping != "{}" {
-		modelMap := make(map[string]string)
-		err := json.Unmarshal([]byte(modelMapping), &modelMap)
-		if err != nil {
-			return err, service.OpenAIErrorWrapperLocal(err, "unmarshal_model_mapping_failed", http.StatusInternalServerError)
-		}
-		if modelMap[testModel] != "" {
-			testModel = modelMap[testModel]
-		}
+	cache, err := model.GetUserCache(1)
+	if err != nil {
+		return err, nil
 	}
+	cache.WriteContext(c)

 	c.Request.Header.Set("Authorization", "Bearer "+channel.Key)
 	c.Request.Header.Set("Content-Type", "application/json")
 	c.Set("channel", channel.Type)
 	c.Set("base_url", channel.GetBaseURL())
+	group, _ := model.GetUserGroup(1, false)
+	c.Set("group", group)

 	middleware.SetupContextForSelectedChannel(c, channel, testModel)

-	meta := relaycommon.GenRelayInfo(c)
+	info := relaycommon.GenRelayInfo(c)
+
+	err = helper.ModelMappedHelper(c, info)
+	if err != nil {
+		return err, nil
+	}
+	testModel = info.UpstreamModelName
+
 	apiType, _ := constant.ChannelType2APIType(channel.Type)
 	adaptor := relay.GetAdaptor(apiType)
 	if adaptor == nil {
@@ -99,12 +103,19 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	}

 	request := buildTestRequest(testModel)
-	meta.UpstreamModelName = testModel
-	common.SysLog(fmt.Sprintf("testing channel %d with model %s , meta %v ", channel.Id, testModel, meta))
+	// 创建一个用于日志的 info 副本，移除 ApiKey
+	logInfo := *info
+	logInfo.ApiKey = ""
+	common.SysLog(fmt.Sprintf("testing channel %d with model %s , info %+v ", channel.Id, testModel, logInfo))

-	adaptor.Init(meta)
+	priceData, err := helper.ModelPriceHelper(c, info, 0, int(request.MaxTokens))
+	if err != nil {
+		return err, nil
+	}

-	convertedRequest, err := adaptor.ConvertRequest(c, meta, request)
+	adaptor.Init(info)
+
+	convertedRequest, err := adaptor.ConvertOpenAIRequest(c, info, request)
 	if err != nil {
 		return err, nil
 	}
@@ -114,7 +125,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	}
 	requestBody := bytes.NewBuffer(jsonData)
 	c.Request.Body = io.NopCloser(requestBody)
-	resp, err := adaptor.DoRequest(c, meta, requestBody)
+	resp, err := adaptor.DoRequest(c, info, requestBody)
 	if err != nil {
 		return err, nil
 	}
@@ -122,11 +133,11 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	if resp != nil {
 		httpResp = resp.(*http.Response)
 		if httpResp.StatusCode != http.StatusOK {
-			err := service.RelayErrorHandler(httpResp)
+			err := service.RelayErrorHandler(httpResp, true)
 			return fmt.Errorf("status code %d: %s", httpResp.StatusCode, err.Error.Message), err
 		}
 	}
-	usageA, respErr := adaptor.DoResponse(c, httpResp, meta)
+	usageA, respErr := adaptor.DoResponse(c, httpResp, info)
 	if respErr != nil {
 		return fmt.Errorf("%s", respErr.Error.Message), respErr
 	}
@@ -139,26 +150,25 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
 	if err != nil {
 		return err, nil
 	}
-	modelPrice, usePrice := common.GetModelPrice(testModel, false)
-	modelRatio := common.GetModelRatio(testModel)
-	completionRatio := common.GetCompletionRatio(testModel)
-	ratio := modelRatio
+	info.PromptTokens = usage.PromptTokens
+
 	quota := 0
-	if !usePrice {
-		quota = usage.PromptTokens + int(math.Round(float64(usage.CompletionTokens)*completionRatio))
-		quota = int(math.Round(float64(quota) * ratio))
-		if ratio != 0 && quota <= 0 {
+	if !priceData.UsePrice {
+		quota = usage.PromptTokens + int(math.Round(float64(usage.CompletionTokens)*priceData.CompletionRatio))
+		quota = int(math.Round(float64(quota) * priceData.ModelRatio))
+		if priceData.ModelRatio != 0 && quota <= 0 {
 			quota = 1
 		}
 	} else {
-		quota = int(modelPrice * common.QuotaPerUnit)
+		quota = int(priceData.ModelPrice * common.QuotaPerUnit)
 	}
 	tok := time.Now()
 	milliseconds := tok.Sub(tik).Milliseconds()
 	consumedTime := float64(milliseconds) / 1000.0
-	other := service.GenerateTextOtherInfo(c, meta, modelRatio, 1, completionRatio, modelPrice)
-	model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, testModel, "模型测试",
-		quota, "模型测试", 0, quota, int(consumedTime), false, "default", other)
+	other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio,
+		usage.PromptTokensDetails.CachedTokens, priceData.CacheRatio, priceData.ModelPrice)
+	model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试",
+		quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other)
 	common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))
 	return nil, nil
 }
@@ -170,17 +180,23 @@ func buildTestRequest(model string) *dto.GeneralOpenAIRequest {
 	}

 	// 先判断是否为 Embedding 模型
-	if strings.Contains(strings.ToLower(model), "embedding") ||
+	if strings.Contains(strings.ToLower(model), "embedding") || // 其他 embedding 模型
 		strings.HasPrefix(model, "m3e") || // m3e 系列模型
-		strings.Contains(model, "bge-") || // bge 系列模型
-		model == "text-embedding-v1" { // 其他 embedding 模型
+		strings.Contains(model, "bge-") {
+		testRequest.Model = model
 		// Embedding 请求
 		testRequest.Input = []string{"hello world"}
 		return testRequest
 	}
 	// 并非Embedding 模型
-	if strings.HasPrefix(model, "o1") || strings.HasPrefix(model, "o3") {
+	if strings.HasPrefix(model, "o") {
 		testRequest.MaxCompletionTokens = 10
+	} else if strings.Contains(model, "thinking") {
+		if !strings.Contains(model, "claude") {
+			testRequest.MaxTokens = 50
+		}
+	} else if strings.Contains(model, "gemini") {
+		testRequest.MaxTokens = 300
 	} else {
 		testRequest.MaxTokens = 10
 	}
--- a/controller/channel.go
+++ b/controller/channel.go
@@ -119,6 +119,9 @@ func FetchUpstreamModels(c *gin.Context) {
 		baseURL = channel.GetBaseURL()
 	}
 	url := fmt.Sprintf("%s/v1/models", baseURL)
+	if channel.Type == common.ChannelTypeGemini {
+		url = fmt.Sprintf("%s/v1beta/openai/models", baseURL)
+	}
 	body, err := GetResponseBody("GET", url, channel, GetAuthHeader(channel.Key))
 	if err != nil {
 		c.JSON(http.StatusOK, gin.H{
@@ -139,7 +142,11 @@ func FetchUpstreamModels(c *gin.Context) {

 	var ids []string
 	for _, model := range result.Data {
-		ids = append(ids, model.ID)
+		id := model.ID
+		if channel.Type == common.ChannelTypeGemini {
+			id = strings.TrimPrefix(id, "models/")
+		}
+		ids = append(ids, id)
 	}

 	c.JSON(http.StatusOK, gin.H{
--- a/controller/image.go
+++ b/controller/image.go
@@ -0,0 +1,9 @@
+package controller
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+func GetImage(c *gin.Context) {
+
+}
--- a/controller/log.go
+++ b/controller/log.go
@@ -196,7 +196,7 @@ func DeleteHistoryLogs(c *gin.Context) {
 		})
 		return
 	}
-	count, err := model.DeleteOldLog(targetTimestamp)
+	count, err := model.DeleteOldLog(c.Request.Context(), targetTimestamp, 100)
 	if err != nil {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
--- a/controller/midjourney.go
+++ b/controller/midjourney.go
@@ -159,7 +159,7 @@ func UpdateMidjourneyTaskBulk() {
 					common.LogError(ctx, "UpdateMidjourneyTask task error: "+err.Error())
 				} else {
 					if shouldReturnQuota {
-						err = model.IncreaseUserQuota(task.UserId, task.Quota)
+						err = model.IncreaseUserQuota(task.UserId, task.Quota, false)
 						if err != nil {
 							common.LogError(ctx, "fail to increase user quota: "+err.Error())
 						}
--- a/controller/misc.go
+++ b/controller/misc.go
@@ -5,8 +5,11 @@ import (
 	"fmt"
 	"net/http"
 	"one-api/common"
+	"one-api/constant"
 	"one-api/model"
 	"one-api/setting"
+	"one-api/setting/operation_setting"
+	"one-api/setting/system_setting"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -33,40 +36,44 @@ func GetStatus(c *gin.Context) {
 		"success": true,
 		"message": "",
 		"data": gin.H{
-			"version":                  common.Version,
-			"start_time":               common.StartTime,
-			"email_verification":       common.EmailVerificationEnabled,
-			"github_oauth":             common.GitHubOAuthEnabled,
-			"github_client_id":         common.GitHubClientId,
-			"linuxdo_oauth":            common.LinuxDOOAuthEnabled,
-			"linuxdo_client_id":        common.LinuxDOClientId,
-			"telegram_oauth":           common.TelegramOAuthEnabled,
-			"telegram_bot_name":        common.TelegramBotName,
-			"system_name":              common.SystemName,
-			"logo":                     common.Logo,
-			"footer_html":              common.Footer,
-			"wechat_qrcode":            common.WeChatAccountQRCodeImageURL,
-			"wechat_login":             common.WeChatAuthEnabled,
-			"server_address":           setting.ServerAddress,
-			"price":                    setting.Price,
-			"min_topup":                setting.MinTopUp,
-			"turnstile_check":          common.TurnstileCheckEnabled,
-			"turnstile_site_key":       common.TurnstileSiteKey,
-			"top_up_link":              common.TopUpLink,
-			"chat_link":                common.ChatLink,
-			"chat_link2":               common.ChatLink2,
-			"quota_per_unit":           common.QuotaPerUnit,
-			"display_in_currency":      common.DisplayInCurrencyEnabled,
-			"enable_batch_update":      common.BatchUpdateEnabled,
-			"enable_drawing":           common.DrawingEnabled,
-			"enable_task":              common.TaskEnabled,
-			"enable_data_export":       common.DataExportEnabled,
-			"data_export_default_time": common.DataExportDefaultTime,
-			"default_collapse_sidebar": common.DefaultCollapseSidebar,
-			"enable_online_topup":      setting.PayAddress != "" && setting.EpayId != "" && setting.EpayKey != "",
-			"mj_notify_enabled":        setting.MjNotifyEnabled,
-			"chats":                    setting.Chats,
-			"demo_site_enabled":        setting.DemoSiteEnabled,
+			"version":                     common.Version,
+			"start_time":                  common.StartTime,
+			"email_verification":          common.EmailVerificationEnabled,
+			"github_oauth":                common.GitHubOAuthEnabled,
+			"github_client_id":            common.GitHubClientId,
+			"linuxdo_oauth":               common.LinuxDOOAuthEnabled,
+			"linuxdo_client_id":           common.LinuxDOClientId,
+			"telegram_oauth":              common.TelegramOAuthEnabled,
+			"telegram_bot_name":           common.TelegramBotName,
+			"system_name":                 common.SystemName,
+			"logo":                        common.Logo,
+			"footer_html":                 common.Footer,
+			"wechat_qrcode":               common.WeChatAccountQRCodeImageURL,
+			"wechat_login":                common.WeChatAuthEnabled,
+			"server_address":              setting.ServerAddress,
+			"price":                       setting.Price,
+			"min_topup":                   setting.MinTopUp,
+			"turnstile_check":             common.TurnstileCheckEnabled,
+			"turnstile_site_key":          common.TurnstileSiteKey,
+			"top_up_link":                 common.TopUpLink,
+			"docs_link":                   operation_setting.GetGeneralSetting().DocsLink,
+			"quota_per_unit":              common.QuotaPerUnit,
+			"display_in_currency":         common.DisplayInCurrencyEnabled,
+			"enable_batch_update":         common.BatchUpdateEnabled,
+			"enable_drawing":              common.DrawingEnabled,
+			"enable_task":                 common.TaskEnabled,
+			"enable_data_export":          common.DataExportEnabled,
+			"data_export_default_time":    common.DataExportDefaultTime,
+			"default_collapse_sidebar":    common.DefaultCollapseSidebar,
+			"enable_online_topup":         setting.PayAddress != "" && setting.EpayId != "" && setting.EpayKey != "",
+			"mj_notify_enabled":           setting.MjNotifyEnabled,
+			"chats":                       setting.Chats,
+			"demo_site_enabled":           operation_setting.DemoSiteEnabled,
+			"self_use_mode_enabled":       operation_setting.SelfUseModeEnabled,
+			"oidc_enabled":                system_setting.GetOIDCSettings().Enabled,
+			"oidc_client_id":              system_setting.GetOIDCSettings().ClientId,
+			"oidc_authorization_endpoint": system_setting.GetOIDCSettings().AuthorizationEndpoint,
+			"setup":                       constant.Setup,
 		},
 	})
 	return
--- a/controller/model.go
+++ b/controller/model.go
@@ -216,6 +216,13 @@ func DashboardListModels(c *gin.Context) {
 	})
 }

+func EnabledListModels(c *gin.Context) {
+	c.JSON(200, gin.H{
+		"success": true,
+		"data":    model.GetEnabledModels(),
+	})
+}
+
 func RetrieveModel(c *gin.Context) {
 	modelId := c.Param("model")
 	if aiModel, ok := openAIModelsMap[modelId]; ok {
--- a/controller/oidc.go
+++ b/controller/oidc.go
@@ -0,0 +1,240 @@
+package controller
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"one-api/common"
+	"one-api/model"
+	"one-api/setting"
+	"one-api/setting/system_setting"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/gin-contrib/sessions"
+	"github.com/gin-gonic/gin"
+)
+
+type OidcResponse struct {
+	AccessToken  string `json:"access_token"`
+	IDToken      string `json:"id_token"`
+	RefreshToken string `json:"refresh_token"`
+	TokenType    string `json:"token_type"`
+	ExpiresIn    int    `json:"expires_in"`
+	Scope        string `json:"scope"`
+}
+
+type OidcUser struct {
+	OpenID            string `json:"sub"`
+	Email             string `json:"email"`
+	Name              string `json:"name"`
+	PreferredUsername string `json:"preferred_username"`
+	Picture           string `json:"picture"`
+}
+
+func getOidcUserInfoByCode(code string) (*OidcUser, error) {
+	if code == "" {
+		return nil, errors.New("无效的参数")
+	}
+
+	values := url.Values{}
+	values.Set("client_id", system_setting.GetOIDCSettings().ClientId)
+	values.Set("client_secret", system_setting.GetOIDCSettings().ClientSecret)
+	values.Set("code", code)
+	values.Set("grant_type", "authorization_code")
+	values.Set("redirect_uri", fmt.Sprintf("%s/oauth/oidc", setting.ServerAddress))
+	formData := values.Encode()
+	req, err := http.NewRequest("POST", system_setting.GetOIDCSettings().TokenEndpoint, strings.NewReader(formData))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	req.Header.Set("Accept", "application/json")
+	client := http.Client{
+		Timeout: 5 * time.Second,
+	}
+	res, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 OIDC 服务器，请稍后重试！")
+	}
+	defer res.Body.Close()
+	var oidcResponse OidcResponse
+	err = json.NewDecoder(res.Body).Decode(&oidcResponse)
+	if err != nil {
+		return nil, err
+	}
+
+	if oidcResponse.AccessToken == "" {
+		common.SysError("OIDC 获取 Token 失败，请检查设置！")
+		return nil, errors.New("OIDC 获取 Token 失败，请检查设置！")
+	}
+
+	req, err = http.NewRequest("GET", system_setting.GetOIDCSettings().UserInfoEndpoint, nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+oidcResponse.AccessToken)
+	res2, err := client.Do(req)
+	if err != nil {
+		common.SysLog(err.Error())
+		return nil, errors.New("无法连接至 OIDC 服务器，请稍后重试！")
+	}
+	defer res2.Body.Close()
+	if res2.StatusCode != http.StatusOK {
+		common.SysError("OIDC 获取用户信息失败！请检查设置！")
+		return nil, errors.New("OIDC 获取用户信息失败！请检查设置！")
+	}
+
+	var oidcUser OidcUser
+	err = json.NewDecoder(res2.Body).Decode(&oidcUser)
+	if err != nil {
+		return nil, err
+	}
+	if oidcUser.OpenID == "" || oidcUser.Email == "" {
+		common.SysError("OIDC 获取用户信息为空！请检查设置！")
+		return nil, errors.New("OIDC 获取用户信息为空！请检查设置！")
+	}
+	return &oidcUser, nil
+}
+
+func OidcAuth(c *gin.Context) {
+	session := sessions.Default(c)
+	state := c.Query("state")
+	if state == "" || session.Get("oauth_state") == nil || state != session.Get("oauth_state").(string) {
+		c.JSON(http.StatusForbidden, gin.H{
+			"success": false,
+			"message": "state is empty or not same",
+		})
+		return
+	}
+	username := session.Get("username")
+	if username != nil {
+		OidcBind(c)
+		return
+	}
+	if !system_setting.GetOIDCSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 OIDC 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	oidcUser, err := getOidcUserInfoByCode(code)
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+	user := model.User{
+		OidcId: oidcUser.OpenID,
+	}
+	if model.IsOidcIdAlreadyTaken(user.OidcId) {
+		err := user.FillUserByOidcId()
+		if err != nil {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": err.Error(),
+			})
+			return
+		}
+	} else {
+		if common.RegisterEnabled {
+			user.Email = oidcUser.Email
+			if oidcUser.PreferredUsername != "" {
+				user.Username = oidcUser.PreferredUsername
+			} else {
+				user.Username = "oidc_" + strconv.Itoa(model.GetMaxUserId()+1)
+			}
+			if oidcUser.Name != "" {
+				user.DisplayName = oidcUser.Name
+			} else {
+				user.DisplayName = "OIDC User"
+			}
+			err := user.Insert(0)
+			if err != nil {
+				c.JSON(http.StatusOK, gin.H{
+					"success": false,
+					"message": err.Error(),
+				})
+				return
+			}
+		} else {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "管理员关闭了新用户注册",
+			})
+			return
+		}
+	}
+
+	if user.Status != common.UserStatusEnabled {
+		c.JSON(http.StatusOK, gin.H{
+			"message": "用户已被封禁",
+			"success": false,
+		})
+		return
+	}
+	setupLogin(&user, c)
+}
+
+func OidcBind(c *gin.Context) {
+	if !system_setting.GetOIDCSettings().Enabled {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "管理员未开启通过 OIDC 登录以及注册",
+		})
+		return
+	}
+	code := c.Query("code")
+	oidcUser, err := getOidcUserInfoByCode(code)
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+	user := model.User{
+		OidcId: oidcUser.OpenID,
+	}
+	if model.IsOidcIdAlreadyTaken(user.OidcId) {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": "该 OIDC 账户已被绑定",
+		})
+		return
+	}
+	session := sessions.Default(c)
+	id := session.Get("id")
+	// id := c.GetInt("id")  // critical bug!
+	user.Id = id.(int)
+	err = user.FillUserById()
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+	user.OidcId = oidcUser.OpenID
+	err = user.Update(false)
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
+	c.JSON(http.StatusOK, gin.H{
+		"success": true,
+		"message": "bind",
+	})
+	return
+}
--- a/controller/option.go
+++ b/controller/option.go
@@ -6,6 +6,7 @@ import (
 	"one-api/common"
 	"one-api/model"
 	"one-api/setting"
+	"one-api/setting/system_setting"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -51,6 +52,14 @@ func UpdateOption(c *gin.Context) {
 			})
 			return
 		}
+	case "oidc.enabled":
+		if option.Value == "true" && system_setting.GetOIDCSettings().ClientId == "" {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无法启用 OIDC 登录，请先填入 OIDC Client Id 以及 OIDC Client Secret！",
+			})
+			return
+		}
 	case "LinuxDOOAuthEnabled":
 		if option.Value == "true" && common.LinuxDOClientId == "" {
 			c.JSON(http.StatusOK, gin.H{
@@ -81,6 +90,15 @@ func UpdateOption(c *gin.Context) {
 				"success": false,
 				"message": "无法启用 Turnstile 校验，请先填入 Turnstile 校验相关配置信息！",
 			})
+
+			return
+		}
+	case "TelegramOAuthEnabled":
+		if option.Value == "true" && common.TelegramBotToken == "" {
+			c.JSON(http.StatusOK, gin.H{
+				"success": false,
+				"message": "无法启用 Telegram OAuth，请先填入 Telegram Bot Token！",
+			})
 			return
 		}
 	case "GroupRatio":
@@ -92,6 +110,7 @@ func UpdateOption(c *gin.Context) {
 			})
 			return
 		}
+
 	}
 	err = model.UpdateOption(option.Key, option.Value)
 	if err != nil {
--- a/controller/pricing.go
+++ b/controller/pricing.go
@@ -2,9 +2,9 @@ package controller

 import (
 	"github.com/gin-gonic/gin"
-	"one-api/common"
 	"one-api/model"
 	"one-api/setting"
+	"one-api/setting/operation_setting"
 )

 func GetPricing(c *gin.Context) {
@@ -40,7 +40,7 @@ func GetPricing(c *gin.Context) {
 }

 func ResetModelRatio(c *gin.Context) {
-	defaultStr := common.DefaultModelRatio2JSONString()
+	defaultStr := operation_setting.DefaultModelRatio2JSONString()
 	err := model.UpdateOption("ModelRatio", defaultStr)
 	if err != nil {
 		c.JSON(200, gin.H{
@@ -49,7 +49,7 @@ func ResetModelRatio(c *gin.Context) {
 		})
 		return
 	}
-	err = common.UpdateModelRatioByJSONString(defaultStr)
+	err = operation_setting.UpdateModelRatioByJSONString(defaultStr)
 	if err != nil {
 		c.JSON(200, gin.H{
 			"success": false,
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -4,26 +4,29 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
-	"github.com/gorilla/websocket"
 	"io"
 	"log"
 	"net/http"
 	"one-api/common"
+	constant2 "one-api/constant"
 	"one-api/dto"
 	"one-api/middleware"
 	"one-api/model"
 	"one-api/relay"
 	"one-api/relay/constant"
 	relayconstant "one-api/relay/constant"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
 )

 func relayHandler(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode {
 	var err *dto.OpenAIErrorWithStatusCode
 	switch relayMode {
-	case relayconstant.RelayModeImagesGenerations:
+	case relayconstant.RelayModeImagesGenerations, relayconstant.RelayModeImagesEdits:
 		err = relay.ImageHelper(c)
 	case relayconstant.RelayModeAudioSpeech:
 		fallthrough
@@ -35,18 +38,31 @@ func relayHandler(c *gin.Context, relayMode int) *dto.OpenAIErrorWithStatusCode
 		err = relay.RerankHelper(c, relayMode)
 	case relayconstant.RelayModeEmbeddings:
 		err = relay.EmbeddingHelper(c)
+	case relayconstant.RelayModeResponses:
+		err = relay.ResponsesHelper(c)
 	default:
 		err = relay.TextHelper(c)
 	}
-	return err
-}

-func wsHandler(c *gin.Context, ws *websocket.Conn, relayMode int) *dto.OpenAIErrorWithStatusCode {
-	var err *dto.OpenAIErrorWithStatusCode
-	switch relayMode {
-	default:
-		err = relay.TextHelper(c)
+	if constant2.ErrorLogEnabled && err != nil {
+		// 保存错误日志到mysql中
+		userId := c.GetInt("id")
+		tokenName := c.GetString("token_name")
+		modelName := c.GetString("original_model")
+		tokenId := c.GetInt("token_id")
+		userGroup := c.GetString("group")
+		channelId := c.GetInt("channel_id")
+		other := make(map[string]interface{})
+		other["error_type"] = err.Error.Type
+		other["error_code"] = err.Error.Code
+		other["status_code"] = err.StatusCode
+		other["channel_id"] = channelId
+		other["channel_name"] = c.GetString("channel_name")
+		other["channel_type"] = c.GetInt("channel_type")
+
+		model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.Error.Message, tokenId, 0, false, userGroup, other)
 	}
+
 	return err
 }

@@ -85,6 +101,7 @@ func Relay(c *gin.Context) {

 	if openaiErr != nil {
 		if openaiErr.StatusCode == http.StatusTooManyRequests {
+			common.LogError(c, fmt.Sprintf("origin 429 error: %s", openaiErr.Error.Message))
 			openaiErr.Error.Message = "当前分组上游负载已饱和，请稍后再试"
 		}
 		openaiErr.Error.Message = common.MessageWithRequestId(openaiErr.Error.Message, requestId)
@@ -109,7 +126,7 @@ func WssRelay(c *gin.Context) {

 	if err != nil {
 		openaiErr := service.OpenAIErrorWrapper(err, "get_channel_failed", http.StatusInternalServerError)
-		service.WssError(c, ws, openaiErr.Error)
+		helper.WssError(c, ws, openaiErr.Error)
 		return
 	}

@@ -151,7 +168,51 @@ func WssRelay(c *gin.Context) {
 			openaiErr.Error.Message = "当前分组上游负载已饱和，请稍后再试"
 		}
 		openaiErr.Error.Message = common.MessageWithRequestId(openaiErr.Error.Message, requestId)
-		service.WssError(c, ws, openaiErr.Error)
+		helper.WssError(c, ws, openaiErr.Error)
+	}
+}
+
+func RelayClaude(c *gin.Context) {
+	//relayMode := constant.Path2RelayMode(c.Request.URL.Path)
+	requestId := c.GetString(common.RequestIdKey)
+	group := c.GetString("group")
+	originalModel := c.GetString("original_model")
+	var claudeErr *dto.ClaudeErrorWithStatusCode
+
+	for i := 0; i <= common.RetryTimes; i++ {
+		channel, err := getChannel(c, group, originalModel, i)
+		if err != nil {
+			common.LogError(c, err.Error())
+			claudeErr = service.ClaudeErrorWrapperLocal(err, "get_channel_failed", http.StatusInternalServerError)
+			break
+		}
+
+		claudeErr = claudeRequest(c, channel)
+
+		if claudeErr == nil {
+			return // 成功处理请求，直接返回
+		}
+
+		openaiErr := service.ClaudeErrorToOpenAIError(claudeErr)
+
+		go processChannelError(c, channel.Id, channel.Type, channel.Name, channel.GetAutoBan(), openaiErr)
+
+		if !shouldRetry(c, openaiErr, common.RetryTimes-i) {
+			break
+		}
+	}
+	useChannel := c.GetStringSlice("use_channel")
+	if len(useChannel) > 1 {
+		retryLogStr := fmt.Sprintf("重试：%s", strings.Trim(strings.Join(strings.Fields(fmt.Sprint(useChannel)), "->"), "[]"))
+		common.LogInfo(c, retryLogStr)
+	}
+
+	if claudeErr != nil {
+		claudeErr.Error.Message = common.MessageWithRequestId(claudeErr.Error.Message, requestId)
+		c.JSON(claudeErr.StatusCode, gin.H{
+			"type":  "error",
+			"error": claudeErr.Error,
+		})
 	}
 }

@@ -169,6 +230,13 @@ func wssRequest(c *gin.Context, ws *websocket.Conn, relayMode int, channel *mode
 	return relay.WssHelper(c, ws)
 }

+func claudeRequest(c *gin.Context, channel *model.Channel) *dto.ClaudeErrorWithStatusCode {
+	addUsedChannel(c, channel.Id)
+	requestBody, _ := common.GetRequestBody(c)
+	c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody))
+	return relay.ClaudeHelper(c)
+}
+
 func addUsedChannel(c *gin.Context, channelId int) {
 	useChannel := c.GetStringSlice("use_channel")
 	useChannel = append(useChannel, fmt.Sprintf("%d", channelId))
--- a/controller/setup.go
+++ b/controller/setup.go
@@ -0,0 +1,173 @@
+package controller
+
+import (
+	"github.com/gin-gonic/gin"
+	"one-api/common"
+	"one-api/constant"
+	"one-api/model"
+	"one-api/setting/operation_setting"
+	"time"
+)
+
+type Setup struct {
+	Status       bool   `json:"status"`
+	RootInit     bool   `json:"root_init"`
+	DatabaseType string `json:"database_type"`
+}
+
+type SetupRequest struct {
+	Username           string `json:"username"`
+	Password           string `json:"password"`
+	ConfirmPassword    string `json:"confirmPassword"`
+	SelfUseModeEnabled bool   `json:"SelfUseModeEnabled"`
+	DemoSiteEnabled    bool   `json:"DemoSiteEnabled"`
+}
+
+func GetSetup(c *gin.Context) {
+	setup := Setup{
+		Status: constant.Setup,
+	}
+	if constant.Setup {
+		c.JSON(200, gin.H{
+			"success": true,
+			"data":    setup,
+		})
+		return
+	}
+	setup.RootInit = model.RootUserExists()
+	if common.UsingMySQL {
+		setup.DatabaseType = "mysql"
+	}
+	if common.UsingPostgreSQL {
+		setup.DatabaseType = "postgres"
+	}
+	if common.UsingSQLite {
+		setup.DatabaseType = "sqlite"
+	}
+	c.JSON(200, gin.H{
+		"success": true,
+		"data":    setup,
+	})
+}
+
+func PostSetup(c *gin.Context) {
+	// Check if setup is already completed
+	if constant.Setup {
+		c.JSON(400, gin.H{
+			"success": false,
+			"message": "系统已经初始化完成",
+		})
+		return
+	}
+
+	// Check if root user already exists
+	rootExists := model.RootUserExists()
+
+	var req SetupRequest
+	err := c.ShouldBindJSON(&req)
+	if err != nil {
+		c.JSON(400, gin.H{
+			"success": false,
+			"message": "请求参数有误",
+		})
+		return
+	}
+
+	// If root doesn't exist, validate and create admin account
+	if !rootExists {
+		// Validate password
+		if req.Password != req.ConfirmPassword {
+			c.JSON(400, gin.H{
+				"success": false,
+				"message": "两次输入的密码不一致",
+			})
+			return
+		}
+
+		if len(req.Password) < 8 {
+			c.JSON(400, gin.H{
+				"success": false,
+				"message": "密码长度至少为8个字符",
+			})
+			return
+		}
+
+		// Create root user
+		hashedPassword, err := common.Password2Hash(req.Password)
+		if err != nil {
+			c.JSON(500, gin.H{
+				"success": false,
+				"message": "系统错误: " + err.Error(),
+			})
+			return
+		}
+		rootUser := model.User{
+			Username:    req.Username,
+			Password:    hashedPassword,
+			Role:        common.RoleRootUser,
+			Status:      common.UserStatusEnabled,
+			DisplayName: "Root User",
+			AccessToken: nil,
+			Quota:       100000000,
+		}
+		err = model.DB.Create(&rootUser).Error
+		if err != nil {
+			c.JSON(500, gin.H{
+				"success": false,
+				"message": "创建管理员账号失败: " + err.Error(),
+			})
+			return
+		}
+	}
+
+	// Set operation modes
+	operation_setting.SelfUseModeEnabled = req.SelfUseModeEnabled
+	operation_setting.DemoSiteEnabled = req.DemoSiteEnabled
+
+	// Save operation modes to database for persistence
+	err = model.UpdateOption("SelfUseModeEnabled", boolToString(req.SelfUseModeEnabled))
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "保存自用模式设置失败: " + err.Error(),
+		})
+		return
+	}
+
+	err = model.UpdateOption("DemoSiteEnabled", boolToString(req.DemoSiteEnabled))
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "保存演示站点模式设置失败: " + err.Error(),
+		})
+		return
+	}
+
+	// Update setup status
+	constant.Setup = true
+
+	setup := model.Setup{
+		Version:       common.Version,
+		InitializedAt: time.Now().Unix(),
+	}
+	err = model.DB.Create(&setup).Error
+	if err != nil {
+		c.JSON(500, gin.H{
+			"success": false,
+			"message": "系统初始化失败: " + err.Error(),
+		})
+		return
+	}
+
+	c.JSON(200, gin.H{
+		"success": true,
+		"message": "系统初始化成功",
+	})
+}
+
+func boolToString(b bool) string {
+	if b {
+		return "true"
+	}
+	return "false"
+}
--- a/controller/task.go
+++ b/controller/task.go
@@ -159,7 +159,7 @@ func updateSunoTaskAll(ctx context.Context, channelId int, taskIds []string, tas
 			} else {
 				quota := task.Quota
 				if quota != 0 {
-					err = model.IncreaseUserQuota(task.UserId, quota)
+					err = model.IncreaseUserQuota(task.UserId, quota, false)
 					if err != nil {
 						common.LogError(ctx, "fail to increase user quota: "+err.Error())
 					}
--- a/controller/topup.go
+++ b/controller/topup.go
@@ -2,9 +2,6 @@ package controller

 import (
 	"fmt"
-	"github.com/Calcium-Ion/go-epay/epay"
-	"github.com/gin-gonic/gin"
-	"github.com/samber/lo"
 	"log"
 	"net/url"
 	"one-api/common"
@@ -14,16 +11,21 @@ import (
 	"strconv"
 	"sync"
 	"time"
+
+	"github.com/Calcium-Ion/go-epay/epay"
+	"github.com/gin-gonic/gin"
+	"github.com/samber/lo"
+	"github.com/shopspring/decimal"
 )

 type EpayRequest struct {
-	Amount        int    `json:"amount"`
+	Amount        int64  `json:"amount"`
 	PaymentMethod string `json:"payment_method"`
 	TopUpCode     string `json:"top_up_code"`
 }

 type AmountRequest struct {
-	Amount    int    `json:"amount"`
+	Amount    int64  `json:"amount"`
 	TopUpCode string `json:"top_up_code"`
 }

@@ -41,25 +43,35 @@ func GetEpayClient() *epay.Client {
 	return withUrl
 }

-func getPayMoney(amount float64, group string) float64 {
+func getPayMoney(amount int64, group string) float64 {
+	dAmount := decimal.NewFromInt(amount)
+
 	if !common.DisplayInCurrencyEnabled {
-		amount = amount / common.QuotaPerUnit
+		dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
+		dAmount = dAmount.Div(dQuotaPerUnit)
 	}
-	// 别问为什么用float64，问就是这么点钱没必要
+
 	topupGroupRatio := common.GetTopupGroupRatio(group)
 	if topupGroupRatio == 0 {
 		topupGroupRatio = 1
 	}
-	payMoney := amount * setting.Price * topupGroupRatio
-	return payMoney
+
+	dTopupGroupRatio := decimal.NewFromFloat(topupGroupRatio)
+	dPrice := decimal.NewFromFloat(setting.Price)
+
+	payMoney := dAmount.Mul(dPrice).Mul(dTopupGroupRatio)
+
+	return payMoney.InexactFloat64()
 }

-func getMinTopup() int {
+func getMinTopup() int64 {
 	minTopup := setting.MinTopUp
 	if !common.DisplayInCurrencyEnabled {
-		minTopup = minTopup * int(common.QuotaPerUnit)
+		dMinTopup := decimal.NewFromInt(int64(minTopup))
+		dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
+		minTopup = int(dMinTopup.Mul(dQuotaPerUnit).IntPart())
 	}
-	return minTopup
+	return int64(minTopup)
 }

 func RequestEpay(c *gin.Context) {
@@ -80,7 +92,7 @@ func RequestEpay(c *gin.Context) {
 		c.JSON(200, gin.H{"message": "error", "data": "获取用户分组失败"})
 		return
 	}
-	payMoney := getPayMoney(float64(req.Amount), group)
+	payMoney := getPayMoney(req.Amount, group)
 	if payMoney < 0.01 {
 		c.JSON(200, gin.H{"message": "error", "data": "充值金额过低"})
 		return
@@ -118,7 +130,9 @@ func RequestEpay(c *gin.Context) {
 	}
 	amount := req.Amount
 	if !common.DisplayInCurrencyEnabled {
-		amount = amount / int(common.QuotaPerUnit)
+		dAmount := decimal.NewFromInt(int64(amount))
+		dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
+		amount = dAmount.Div(dQuotaPerUnit).IntPart()
 	}
 	topUp := &model.TopUp{
 		UserId:     id,
@@ -210,13 +224,16 @@ func EpayNotify(c *gin.Context) {
 			}
 			//user, _ := model.GetUserById(topUp.UserId, false)
 			//user.Quota += topUp.Amount * 500000
-			err = model.IncreaseUserQuota(topUp.UserId, topUp.Amount*int(common.QuotaPerUnit))
+			dAmount := decimal.NewFromInt(int64(topUp.Amount))
+			dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
+			quotaToAdd := int(dAmount.Mul(dQuotaPerUnit).IntPart())
+			err = model.IncreaseUserQuota(topUp.UserId, quotaToAdd, true)
 			if err != nil {
 				log.Printf("易支付回调更新用户失败: %v", topUp)
 				return
 			}
 			log.Printf("易支付回调更新用户成功 %v", topUp)
-			model.RecordLog(topUp.UserId, model.LogTypeTopup, fmt.Sprintf("使用在线充值成功，充值金额: %v，支付金额：%f", common.LogQuota(topUp.Amount*int(common.QuotaPerUnit)), topUp.Money))
+			model.RecordLog(topUp.UserId, model.LogTypeTopup, fmt.Sprintf("使用在线充值成功，充值金额: %v，支付金额：%f", common.LogQuota(quotaToAdd), topUp.Money))
 		}
 	} else {
 		log.Printf("易支付异常回调: %v", verifyInfo)
@@ -241,7 +258,7 @@ func RequestAmount(c *gin.Context) {
 		c.JSON(200, gin.H{"message": "error", "data": "获取用户分组失败"})
 		return
 	}
-	payMoney := getPayMoney(float64(req.Amount), group)
+	payMoney := getPayMoney(req.Amount, group)
 	if payMoney <= 0.01 {
 		c.JSON(200, gin.H{"message": "error", "data": "充值金额过低"})
 		return
--- a/controller/user.go
+++ b/controller/user.go
@@ -592,7 +592,14 @@ func UpdateSelf(c *gin.Context) {
 		user.Password = "" // rollback to what it should be
 		cleanUser.Password = ""
 	}
-	updatePassword := user.Password != ""
+	updatePassword, err := checkUpdatePassword(user.OriginalPassword, user.Password, cleanUser.Id)
+	if err != nil {
+		c.JSON(http.StatusOK, gin.H{
+			"success": false,
+			"message": err.Error(),
+		})
+		return
+	}
 	if err := cleanUser.Update(updatePassword); err != nil {
 		c.JSON(http.StatusOK, gin.H{
 			"success": false,
@@ -608,6 +615,23 @@ func UpdateSelf(c *gin.Context) {
 	return
 }

+func checkUpdatePassword(originalPassword string, newPassword string, userId int) (updatePassword bool, err error) {
+	var currentUser *model.User
+	currentUser, err = model.GetUserById(userId, true)
+	if err != nil {
+		return
+	}
+	if !common.ValidatePasswordAndHash(originalPassword, currentUser.Password) {
+		err = fmt.Errorf("原密码错误")
+		return
+	}
+	if newPassword == "" {
+		return
+	}
+	updatePassword = true
+	return
+}
+
 func DeleteUser(c *gin.Context) {
 	id, err := strconv.Atoi(c.Param("id"))
 	if err != nil {
@@ -913,11 +937,12 @@ func TopUp(c *gin.Context) {
 }

 type UpdateUserSettingRequest struct {
-	QuotaWarningType      string `json:"notify_type"`
-	QuotaWarningThreshold int    `json:"quota_warning_threshold"`
-	WebhookUrl            string `json:"webhook_url,omitempty"`
-	WebhookSecret         string `json:"webhook_secret,omitempty"`
-	NotificationEmail     string `json:"notification_email,omitempty"`
+	QuotaWarningType           string  `json:"notify_type"`
+	QuotaWarningThreshold      float64 `json:"quota_warning_threshold"`
+	WebhookUrl                 string  `json:"webhook_url,omitempty"`
+	WebhookSecret              string  `json:"webhook_secret,omitempty"`
+	NotificationEmail          string  `json:"notification_email,omitempty"`
+	AcceptUnsetModelRatioModel bool    `json:"accept_unset_model_ratio_model"`
 }

 func UpdateUserSetting(c *gin.Context) {
@@ -993,6 +1018,7 @@ func UpdateUserSetting(c *gin.Context) {
 	settings := map[string]interface{}{
 		constant.UserSettingNotifyType:            req.QuotaWarningType,
 		constant.UserSettingQuotaWarningThreshold: req.QuotaWarningThreshold,
+		"accept_unset_model_ratio_model":          req.AcceptUnsetModelRatioModel,
 	}

 	// 如果是webhook类型,添加webhook相关设置
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -15,6 +15,8 @@ services:
      - SQL_DSN=root:123456@tcp(mysql:3306)/new-api  # Point to the mysql service
      - REDIS_CONN_STRING=redis://redis
      - TZ=Asia/Shanghai
+      - ERROR_LOG_ENABLED=true # 是否启用错误日志记录
+    #      - TIKTOKEN_CACHE_DIR=./tiktoken_cache  # 如果需要使用tiktoken_cache，请取消注释
    #      - SESSION_SECRET=random_string  # 多机部署时设置，必须修改这个随机字符串！！！！！！！
    #      - NODE_TYPE=slave  # Uncomment for slave node in multi-node deployment
    #      - SYNC_FREQUENCY=60  # Uncomment if regular database syncing is needed
--- a/docs/channel/other_setting.md
+++ b/docs/channel/other_setting.md
@@ -10,6 +10,10 @@
    - 用于配置网络代理
    - 类型为字符串，填写代理地址（例如 socks5 协议的代理地址）

+3. thinking_to_content
+   - 用于标识是否将思考内容`reasoning_content`转换为`<think>`标签拼接到内容中返回
+   - 类型为布尔值，设置为 true 时启用思考内容转换
+
 --------------------------------------------------------------

 ## JSON 格式示例
@@ -19,10 +23,11 @@
 ```json
 {
    "force_format": true,
+   "thinking_to_content": true,
    "proxy": "socks5://xxxxxxx"
 }
 ```

 --------------------------------------------------------------

-通过调整上述 JSON 配置中的值，可以灵活控制渠道的额外行为，比如是否进行格式化以及使用特定的网络代理。
+通过调整上述 JSON 配置中的值，可以灵活控制渠道的额外行为，比如是否进行格式化以及使用特定的网络代理。
--- a/docs/installation/BT.md
+++ b/docs/installation/BT.md
@@ -1,3 +1,3 @@
-密钥为环境变量SESSION_SECRET
-
-![8285bba413e770fe9620f1bf9b40d44e](https://github.com/user-attachments/assets/7a6fc03e-c457-45e4-b8f9-184508fc26b0)
+密钥为环境变量SESSION_SECRET
+
+![8285bba413e770fe9620f1bf9b40d44e](https://github.com/user-attachments/assets/7a6fc03e-c457-45e4-b8f9-184508fc26b0)
--- a/docs/models/Midjourney.md
+++ b/docs/models/Midjourney.md
--- a/docs/models/Rerank.md
+++ b/docs/models/Rerank.md
@@ -13,7 +13,7 @@ Request:

 ```json
 {
-  "model": "rerank-multilingual-v3.0",
+  "model": "jina-reranker-v2-base-multilingual",
  "query": "What is the capital of the United States?",
  "top_n": 3,
  "documents": [
--- a/docs/models/Suno.md
+++ b/docs/models/Suno.md
--- a/dto/claude.go
+++ b/dto/claude.go
@@ -0,0 +1,218 @@
+package dto
+
+import "encoding/json"
+
+type ClaudeMetadata struct {
+	UserId string `json:"user_id"`
+}
+
+type ClaudeMediaMessage struct {
+	Type        string               `json:"type,omitempty"`
+	Text        *string              `json:"text,omitempty"`
+	Model       string               `json:"model,omitempty"`
+	Source      *ClaudeMessageSource `json:"source,omitempty"`
+	Usage       *ClaudeUsage         `json:"usage,omitempty"`
+	StopReason  *string              `json:"stop_reason,omitempty"`
+	PartialJson *string              `json:"partial_json,omitempty"`
+	Role        string               `json:"role,omitempty"`
+	Thinking    string               `json:"thinking,omitempty"`
+	Signature   string               `json:"signature,omitempty"`
+	Delta       string               `json:"delta,omitempty"`
+	// tool_calls
+	Id        string          `json:"id,omitempty"`
+	Name      string          `json:"name,omitempty"`
+	Input     any             `json:"input,omitempty"`
+	Content   json.RawMessage `json:"content,omitempty"`
+	ToolUseId string          `json:"tool_use_id,omitempty"`
+}
+
+func (c *ClaudeMediaMessage) SetText(s string) {
+	c.Text = &s
+}
+
+func (c *ClaudeMediaMessage) GetText() string {
+	if c.Text == nil {
+		return ""
+	}
+	return *c.Text
+}
+
+func (c *ClaudeMediaMessage) IsStringContent() bool {
+	var content string
+	return json.Unmarshal(c.Content, &content) == nil
+}
+
+func (c *ClaudeMediaMessage) GetStringContent() string {
+	var content string
+	if err := json.Unmarshal(c.Content, &content); err == nil {
+		return content
+	}
+	return ""
+}
+
+func (c *ClaudeMediaMessage) GetJsonRowString() string {
+	jsonContent, _ := json.Marshal(c)
+	return string(jsonContent)
+}
+
+func (c *ClaudeMediaMessage) SetContent(content any) {
+	jsonContent, _ := json.Marshal(content)
+	c.Content = jsonContent
+}
+
+func (c *ClaudeMediaMessage) ParseMediaContent() []ClaudeMediaMessage {
+	var mediaContent []ClaudeMediaMessage
+	if err := json.Unmarshal(c.Content, &mediaContent); err == nil {
+		return mediaContent
+	}
+	return make([]ClaudeMediaMessage, 0)
+}
+
+type ClaudeMessageSource struct {
+	Type      string `json:"type"`
+	MediaType string `json:"media_type,omitempty"`
+	Data      any    `json:"data,omitempty"`
+	Url       string `json:"url,omitempty"`
+}
+
+type ClaudeMessage struct {
+	Role    string `json:"role"`
+	Content any    `json:"content"`
+}
+
+func (c *ClaudeMessage) IsStringContent() bool {
+	_, ok := c.Content.(string)
+	return ok
+}
+
+func (c *ClaudeMessage) GetStringContent() string {
+	if c.IsStringContent() {
+		return c.Content.(string)
+	}
+	return ""
+}
+
+func (c *ClaudeMessage) SetStringContent(content string) {
+	c.Content = content
+}
+
+func (c *ClaudeMessage) ParseContent() ([]ClaudeMediaMessage, error) {
+	// map content to []ClaudeMediaMessage
+	// parse to json
+	jsonContent, _ := json.Marshal(c.Content)
+	var contentList []ClaudeMediaMessage
+	err := json.Unmarshal(jsonContent, &contentList)
+	if err != nil {
+		return make([]ClaudeMediaMessage, 0), err
+	}
+	return contentList, nil
+}
+
+type Tool struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description,omitempty"`
+	InputSchema map[string]interface{} `json:"input_schema"`
+}
+
+type InputSchema struct {
+	Type       string `json:"type"`
+	Properties any    `json:"properties,omitempty"`
+	Required   any    `json:"required,omitempty"`
+}
+
+type ClaudeRequest struct {
+	Model             string          `json:"model"`
+	Prompt            string          `json:"prompt,omitempty"`
+	System            any             `json:"system,omitempty"`
+	Messages          []ClaudeMessage `json:"messages,omitempty"`
+	MaxTokens         uint            `json:"max_tokens,omitempty"`
+	MaxTokensToSample uint            `json:"max_tokens_to_sample,omitempty"`
+	StopSequences     []string        `json:"stop_sequences,omitempty"`
+	Temperature       *float64        `json:"temperature,omitempty"`
+	TopP              float64         `json:"top_p,omitempty"`
+	TopK              int             `json:"top_k,omitempty"`
+	//ClaudeMetadata    `json:"metadata,omitempty"`
+	Stream     bool      `json:"stream,omitempty"`
+	Tools      any       `json:"tools,omitempty"`
+	ToolChoice any       `json:"tool_choice,omitempty"`
+	Thinking   *Thinking `json:"thinking,omitempty"`
+}
+
+type Thinking struct {
+	Type         string `json:"type"`
+	BudgetTokens int    `json:"budget_tokens"`
+}
+
+func (c *ClaudeRequest) IsStringSystem() bool {
+	_, ok := c.System.(string)
+	return ok
+}
+
+func (c *ClaudeRequest) GetStringSystem() string {
+	if c.IsStringSystem() {
+		return c.System.(string)
+	}
+	return ""
+}
+
+func (c *ClaudeRequest) SetStringSystem(system string) {
+	c.System = system
+}
+
+func (c *ClaudeRequest) ParseSystem() []ClaudeMediaMessage {
+	// map content to []ClaudeMediaMessage
+	// parse to json
+	jsonContent, _ := json.Marshal(c.System)
+	var contentList []ClaudeMediaMessage
+	if err := json.Unmarshal(jsonContent, &contentList); err == nil {
+		return contentList
+	}
+	return make([]ClaudeMediaMessage, 0)
+}
+
+type ClaudeError struct {
+	Type    string `json:"type,omitempty"`
+	Message string `json:"message,omitempty"`
+}
+
+type ClaudeErrorWithStatusCode struct {
+	Error      ClaudeError `json:"error"`
+	StatusCode int         `json:"status_code"`
+	LocalError bool
+}
+
+type ClaudeResponse struct {
+	Id           string               `json:"id,omitempty"`
+	Type         string               `json:"type"`
+	Role         string               `json:"role,omitempty"`
+	Content      []ClaudeMediaMessage `json:"content,omitempty"`
+	Completion   string               `json:"completion,omitempty"`
+	StopReason   string               `json:"stop_reason,omitempty"`
+	Model        string               `json:"model,omitempty"`
+	Error        *ClaudeError         `json:"error,omitempty"`
+	Usage        *ClaudeUsage         `json:"usage,omitempty"`
+	Index        *int                 `json:"index,omitempty"`
+	ContentBlock *ClaudeMediaMessage  `json:"content_block,omitempty"`
+	Delta        *ClaudeMediaMessage  `json:"delta,omitempty"`
+	Message      *ClaudeMediaMessage  `json:"message,omitempty"`
+}
+
+// set index
+func (c *ClaudeResponse) SetIndex(i int) {
+	c.Index = &i
+}
+
+// get index
+func (c *ClaudeResponse) GetIndex() int {
+	if c.Index == nil {
+		return 0
+	}
+	return *c.Index
+}
+
+type ClaudeUsage struct {
+	InputTokens              int `json:"input_tokens"`
+	CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
+	CacheReadInputTokens     int `json:"cache_read_input_tokens"`
+	OutputTokens             int `json:"output_tokens"`
+}
--- a/dto/dalle.go
+++ b/dto/dalle.go
@@ -1,14 +1,17 @@
 package dto

+import "encoding/json"
+
 type ImageRequest struct {
-	Model          string `json:"model"`
-	Prompt         string `json:"prompt" binding:"required"`
-	N              int    `json:"n,omitempty"`
-	Size           string `json:"size,omitempty"`
-	Quality        string `json:"quality,omitempty"`
-	ResponseFormat string `json:"response_format,omitempty"`
-	Style          string `json:"style,omitempty"`
-	User           string `json:"user,omitempty"`
+	Model          string          `json:"model"`
+	Prompt         string          `json:"prompt" binding:"required"`
+	N              int             `json:"n,omitempty"`
+	Size           string          `json:"size,omitempty"`
+	Quality        string          `json:"quality,omitempty"`
+	ResponseFormat string          `json:"response_format,omitempty"`
+	Style          string          `json:"style,omitempty"`
+	User           string          `json:"user,omitempty"`
+	ExtraFields    json.RawMessage `json:"extra_fields,omitempty"`
 }

 type ImageResponse struct {
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -1,6 +1,9 @@
 package dto

-import "encoding/json"
+import (
+	"encoding/json"
+	"strings"
+)

 type ResponseFormat struct {
 	Type       string            `json:"type,omitempty"`
@@ -15,49 +18,54 @@ type FormatJsonSchema struct {
 }

 type GeneralOpenAIRequest struct {
-	Model               string          `json:"model,omitempty"`
-	Messages            []Message       `json:"messages,omitempty"`
-	Prompt              any             `json:"prompt,omitempty"`
-	Prefix              any             `json:"prefix,omitempty"`
-	Suffix              any             `json:"suffix,omitempty"`
-	Stream              bool            `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions  `json:"stream_options,omitempty"`
-	MaxTokens           uint            `json:"max_tokens,omitempty"`
-	MaxCompletionTokens uint            `json:"max_completion_tokens,omitempty"`
-	ReasoningEffort     string          `json:"reasoning_effort,omitempty"`
-	Temperature         *float64        `json:"temperature,omitempty"`
-	TopP                float64         `json:"top_p,omitempty"`
-	TopK                int             `json:"top_k,omitempty"`
-	Stop                any             `json:"stop,omitempty"`
-	N                   int             `json:"n,omitempty"`
-	Input               any             `json:"input,omitempty"`
-	Instruction         string          `json:"instruction,omitempty"`
-	Size                string          `json:"size,omitempty"`
-	Functions           any             `json:"functions,omitempty"`
-	FrequencyPenalty    float64         `json:"frequency_penalty,omitempty"`
-	PresencePenalty     float64         `json:"presence_penalty,omitempty"`
-	ResponseFormat      *ResponseFormat `json:"response_format,omitempty"`
-	EncodingFormat      any             `json:"encoding_format,omitempty"`
-	Seed                float64         `json:"seed,omitempty"`
-	Tools               []ToolCall      `json:"tools,omitempty"`
-	ToolChoice          any             `json:"tool_choice,omitempty"`
-	User                string          `json:"user,omitempty"`
-	LogProbs            bool            `json:"logprobs,omitempty"`
-	TopLogProbs         int             `json:"top_logprobs,omitempty"`
-	Dimensions          int             `json:"dimensions,omitempty"`
-	Modalities          any             `json:"modalities,omitempty"`
-	Audio               any             `json:"audio,omitempty"`
+	Model               string         `json:"model,omitempty"`
+	Messages            []Message      `json:"messages,omitempty"`
+	Prompt              any            `json:"prompt,omitempty"`
+	Prefix              any            `json:"prefix,omitempty"`
+	Suffix              any            `json:"suffix,omitempty"`
+	Stream              bool           `json:"stream,omitempty"`
+	StreamOptions       *StreamOptions `json:"stream_options,omitempty"`
+	MaxTokens           uint           `json:"max_tokens,omitempty"`
+	MaxCompletionTokens uint           `json:"max_completion_tokens,omitempty"`
+	ReasoningEffort     string         `json:"reasoning_effort,omitempty"`
+	//Reasoning           json.RawMessage   `json:"reasoning,omitempty"`
+	Temperature      *float64          `json:"temperature,omitempty"`
+	TopP             float64           `json:"top_p,omitempty"`
+	TopK             int               `json:"top_k,omitempty"`
+	Stop             any               `json:"stop,omitempty"`
+	N                int               `json:"n,omitempty"`
+	Input            any               `json:"input,omitempty"`
+	Instruction      string            `json:"instruction,omitempty"`
+	Size             string            `json:"size,omitempty"`
+	Functions        any               `json:"functions,omitempty"`
+	FrequencyPenalty float64           `json:"frequency_penalty,omitempty"`
+	PresencePenalty  float64           `json:"presence_penalty,omitempty"`
+	ResponseFormat   *ResponseFormat   `json:"response_format,omitempty"`
+	EncodingFormat   any               `json:"encoding_format,omitempty"`
+	Seed             float64           `json:"seed,omitempty"`
+	Tools            []ToolCallRequest `json:"tools,omitempty"`
+	ToolChoice       any               `json:"tool_choice,omitempty"`
+	User             string            `json:"user,omitempty"`
+	LogProbs         bool              `json:"logprobs,omitempty"`
+	TopLogProbs      int               `json:"top_logprobs,omitempty"`
+	Dimensions       int               `json:"dimensions,omitempty"`
+	Modalities       any               `json:"modalities,omitempty"`
+	Audio            any               `json:"audio,omitempty"`
+	EnableThinking   any               `json:"enable_thinking,omitempty"` // ali
+	ExtraBody        any               `json:"extra_body,omitempty"`
 }

-type OpenAITools struct {
-	Type     string         `json:"type"`
-	Function OpenAIFunction `json:"function"`
+type ToolCallRequest struct {
+	ID       string          `json:"id,omitempty"`
+	Type     string          `json:"type"`
+	Function FunctionRequest `json:"function"`
 }

-type OpenAIFunction struct {
+type FunctionRequest struct {
 	Description string `json:"description,omitempty"`
 	Name        string `json:"name"`
 	Parameters  any    `json:"parameters,omitempty"`
+	Arguments   string `json:"arguments,omitempty"`
 }

 type StreamOptions struct {
@@ -93,6 +101,7 @@ type Message struct {
 	Name                *string         `json:"name,omitempty"`
 	Prefix              *bool           `json:"prefix,omitempty"`
 	ReasoningContent    string          `json:"reasoning_content,omitempty"`
+	Reasoning           string          `json:"reasoning,omitempty"`
 	ToolCalls           json.RawMessage `json:"tool_calls,omitempty"`
 	ToolCallId          string          `json:"tool_call_id,omitempty"`
 	parsedContent       []MediaContent
@@ -101,14 +110,42 @@ type Message struct {

 type MediaContent struct {
 	Type       string `json:"type"`
-	Text       string `json:"text"`
+	Text       string `json:"text,omitempty"`
 	ImageUrl   any    `json:"image_url,omitempty"`
 	InputAudio any    `json:"input_audio,omitempty"`
+	File       any    `json:"file,omitempty"`
+	VideoUrl   any    `json:"video_url,omitempty"`
+}
+
+func (m *MediaContent) GetImageMedia() *MessageImageUrl {
+	if m.ImageUrl != nil {
+		return m.ImageUrl.(*MessageImageUrl)
+	}
+	return nil
+}
+
+func (m *MediaContent) GetInputAudio() *MessageInputAudio {
+	if m.InputAudio != nil {
+		return m.InputAudio.(*MessageInputAudio)
+	}
+	return nil
+}
+
+func (m *MediaContent) GetFile() *MessageFile {
+	if m.File != nil {
+		return m.File.(*MessageFile)
+	}
+	return nil
 }

 type MessageImageUrl struct {
-	Url    string `json:"url"`
-	Detail string `json:"detail"`
+	Url      string `json:"url"`
+	Detail   string `json:"detail"`
+	MimeType string
+}
+
+func (m *MessageImageUrl) IsRemoteImage() bool {
+	return strings.HasPrefix(m.Url, "http")
 }

 type MessageInputAudio struct {
@@ -116,10 +153,22 @@ type MessageInputAudio struct {
 	Format string `json:"format"`
 }

+type MessageFile struct {
+	FileName string `json:"filename,omitempty"`
+	FileData string `json:"file_data,omitempty"`
+	FileId   string `json:"file_id,omitempty"`
+}
+
+type MessageVideoUrl struct {
+	Url string `json:"url"`
+}
+
 const (
 	ContentTypeText       = "text"
 	ContentTypeImageURL   = "image_url"
 	ContentTypeInputAudio = "input_audio"
+	ContentTypeFile       = "file"
+	ContentTypeVideoUrl   = "video_url" // 阿里百炼视频识别
 )

 func (m *Message) GetPrefix() bool {
@@ -133,11 +182,11 @@ func (m *Message) SetPrefix(prefix bool) {
 	m.Prefix = &prefix
 }

-func (m *Message) ParseToolCalls() []ToolCall {
+func (m *Message) ParseToolCalls() []ToolCallRequest {
 	if m.ToolCalls == nil {
 		return nil
 	}
-	var toolCalls []ToolCall
+	var toolCalls []ToolCallRequest
 	if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
 		return toolCalls
 	}
@@ -153,11 +202,30 @@ func (m *Message) StringContent() string {
 	if m.parsedStringContent != nil {
 		return *m.parsedStringContent
 	}
+
 	var stringContent string
 	if err := json.Unmarshal(m.Content, &stringContent); err == nil {
+		m.parsedStringContent = &stringContent
 		return stringContent
 	}
-	return string(m.Content)
+
+	contentStr := new(strings.Builder)
+	arrayContent := m.ParseContent()
+	for _, content := range arrayContent {
+		if content.Type == ContentTypeText {
+			contentStr.WriteString(content.Text)
+		}
+	}
+	stringContent = contentStr.String()
+	m.parsedStringContent = &stringContent
+
+	return stringContent
+}
+
+func (m *Message) SetNullContent() {
+	m.Content = nil
+	m.parsedStringContent = nil
+	m.parsedContent = nil
 }

 func (m *Message) SetStringContent(content string) {
@@ -224,46 +292,75 @@ func (m *Message) ParseContent() []MediaContent {

 			case ContentTypeImageURL:
 				imageUrl := contentItem["image_url"]
+				temp := &MessageImageUrl{
+					Detail: "high",
+				}
 				switch v := imageUrl.(type) {
 				case string:
-					contentList = append(contentList, MediaContent{
-						Type: ContentTypeImageURL,
-						ImageUrl: MessageImageUrl{
-							Url:    v,
-							Detail: "high",
-						},
-					})
+					temp.Url = v
 				case map[string]interface{}:
 					url, ok1 := v["url"].(string)
 					detail, ok2 := v["detail"].(string)
-					if !ok2 {
-						detail = "high"
+					if ok2 {
+						temp.Detail = detail
 					}
 					if ok1 {
-						contentList = append(contentList, MediaContent{
-							Type: ContentTypeImageURL,
-							ImageUrl: MessageImageUrl{
-								Url:    url,
-								Detail: detail,
-							},
-						})
+						temp.Url = url
 					}
 				}
+				contentList = append(contentList, MediaContent{
+					Type:     ContentTypeImageURL,
+					ImageUrl: temp,
+				})

 			case ContentTypeInputAudio:
 				if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
 					data, ok1 := audioData["data"].(string)
 					format, ok2 := audioData["format"].(string)
 					if ok1 && ok2 {
+						temp := &MessageInputAudio{
+							Data:   data,
+							Format: format,
+						}
 						contentList = append(contentList, MediaContent{
-							Type: ContentTypeInputAudio,
-							InputAudio: MessageInputAudio{
-								Data:   data,
-								Format: format,
-							},
+							Type:       ContentTypeInputAudio,
+							InputAudio: temp,
 						})
 					}
 				}
+			case ContentTypeFile:
+				if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
+					fileId, ok3 := fileData["file_id"].(string)
+					if ok3 {
+						contentList = append(contentList, MediaContent{
+							Type: ContentTypeFile,
+							File: &MessageFile{
+								FileId: fileId,
+							},
+						})
+					} else {
+						fileName, ok1 := fileData["filename"].(string)
+						fileDataStr, ok2 := fileData["file_data"].(string)
+						if ok1 && ok2 {
+							contentList = append(contentList, MediaContent{
+								Type: ContentTypeFile,
+								File: &MessageFile{
+									FileName: fileName,
+									FileData: fileDataStr,
+								},
+							})
+						}
+					}
+				}
+			case ContentTypeVideoUrl:
+				if videoUrl, ok := contentItem["video_url"].(string); ok {
+					contentList = append(contentList, MediaContent{
+						Type: ContentTypeVideoUrl,
+						VideoUrl: &MessageVideoUrl{
+							Url: videoUrl,
+						},
+					})
+				}
 			}
 		}
 	}
@@ -273,3 +370,49 @@ func (m *Message) ParseContent() []MediaContent {
 	}
 	return contentList
 }
+
+type OpenAIResponsesRequest struct {
+	Model              string               `json:"model"`
+	Input              json.RawMessage      `json:"input,omitempty"`
+	Include            json.RawMessage      `json:"include,omitempty"`
+	Instructions       json.RawMessage      `json:"instructions,omitempty"`
+	MaxOutputTokens    uint                 `json:"max_output_tokens,omitempty"`
+	Metadata           json.RawMessage      `json:"metadata,omitempty"`
+	ParallelToolCalls  bool                 `json:"parallel_tool_calls,omitempty"`
+	PreviousResponseID string               `json:"previous_response_id,omitempty"`
+	Reasoning          *Reasoning           `json:"reasoning,omitempty"`
+	ServiceTier        string               `json:"service_tier,omitempty"`
+	Store              bool                 `json:"store,omitempty"`
+	Stream             bool                 `json:"stream,omitempty"`
+	Temperature        float64              `json:"temperature,omitempty"`
+	Text               json.RawMessage      `json:"text,omitempty"`
+	ToolChoice         json.RawMessage      `json:"tool_choice,omitempty"`
+	Tools              []ResponsesToolsCall `json:"tools,omitempty"`
+	TopP               float64              `json:"top_p,omitempty"`
+	Truncation         string               `json:"truncation,omitempty"`
+	User               string               `json:"user,omitempty"`
+}
+
+type Reasoning struct {
+	Effort  string `json:"effort,omitempty"`
+	Summary string `json:"summary,omitempty"`
+}
+
+type ResponsesToolsCall struct {
+	Type string `json:"type"`
+	// Web Search
+	UserLocation      json.RawMessage `json:"user_location,omitempty"`
+	SearchContextSize string          `json:"search_context_size,omitempty"`
+	// File Search
+	VectorStoreIds []string        `json:"vector_store_ids,omitempty"`
+	MaxNumResults  uint            `json:"max_num_results,omitempty"`
+	Filters        json.RawMessage `json:"filters,omitempty"`
+	// Computer Use
+	DisplayWidth  uint   `json:"display_width,omitempty"`
+	DisplayHeight uint   `json:"display_height,omitempty"`
+	Environment   string `json:"environment,omitempty"`
+	// Function
+	Name        string          `json:"name,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Parameters  json.RawMessage `json:"parameters,omitempty"`
+}
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -1,20 +1,10 @@
 package dto

-type TextResponseWithError struct {
-	Id      string                        `json:"id"`
-	Object  string                        `json:"object"`
-	Created int64                         `json:"created"`
-	Choices []OpenAITextResponseChoice    `json:"choices"`
-	Data    []OpenAIEmbeddingResponseItem `json:"data"`
-	Model   string                        `json:"model"`
-	Usage   `json:"usage"`
-	Error   OpenAIError `json:"error"`
-}
+import "encoding/json"

 type SimpleResponse struct {
-	Usage   `json:"usage"`
-	Error   OpenAIError                `json:"error"`
-	Choices []OpenAITextResponseChoice `json:"choices"`
+	Usage `json:"usage"`
+	Error *OpenAIError `json:"error"`
 }

 type TextResponse struct {
@@ -38,6 +28,7 @@ type OpenAITextResponse struct {
 	Object  string                     `json:"object"`
 	Created int64                      `json:"created"`
 	Choices []OpenAITextResponseChoice `json:"choices"`
+	Error   *OpenAIError               `json:"error,omitempty"`
 	Usage   `json:"usage"`
 }

@@ -62,10 +53,11 @@ type ChatCompletionsStreamResponseChoice struct {
 }

 type ChatCompletionsStreamResponseChoiceDelta struct {
-	Content          *string    `json:"content,omitempty"`
-	ReasoningContent *string    `json:"reasoning_content,omitempty"`
-	Role             string     `json:"role,omitempty"`
-	ToolCalls        []ToolCall `json:"tool_calls,omitempty"`
+	Content          *string            `json:"content,omitempty"`
+	ReasoningContent *string            `json:"reasoning_content,omitempty"`
+	Reasoning        *string            `json:"reasoning,omitempty"`
+	Role             string             `json:"role,omitempty"`
+	ToolCalls        []ToolCallResponse `json:"tool_calls,omitempty"`
 }

 func (c *ChatCompletionsStreamResponseChoiceDelta) SetContentString(s string) {
@@ -80,30 +72,38 @@ func (c *ChatCompletionsStreamResponseChoiceDelta) GetContentString() string {
 }

 func (c *ChatCompletionsStreamResponseChoiceDelta) GetReasoningContent() string {
-	if c.ReasoningContent == nil {
+	if c.ReasoningContent == nil && c.Reasoning == nil {
 		return ""
 	}
-	return *c.ReasoningContent
+	if c.ReasoningContent != nil {
+		return *c.ReasoningContent
+	}
+	return *c.Reasoning
 }

-type ToolCall struct {
+func (c *ChatCompletionsStreamResponseChoiceDelta) SetReasoningContent(s string) {
+	c.ReasoningContent = &s
+	c.Reasoning = &s
+}
+
+type ToolCallResponse struct {
 	// Index is not nil only in chat completion chunk object
-	Index    *int         `json:"index,omitempty"`
-	ID       string       `json:"id,omitempty"`
-	Type     any          `json:"type"`
-	Function FunctionCall `json:"function"`
+	Index    *int             `json:"index,omitempty"`
+	ID       string           `json:"id,omitempty"`
+	Type     any              `json:"type"`
+	Function FunctionResponse `json:"function"`
 }

-func (c *ToolCall) SetIndex(i int) {
+func (c *ToolCallResponse) SetIndex(i int) {
 	c.Index = &i
 }

-type FunctionCall struct {
+type FunctionResponse struct {
 	Description string `json:"description,omitempty"`
 	Name        string `json:"name,omitempty"`
 	// call function with arguments in JSON format
 	Parameters any    `json:"parameters,omitempty"` // request
-	Arguments  string `json:"arguments,omitempty"`
+	Arguments  string `json:"arguments"`            // response
 }

 type ChatCompletionsStreamResponse struct {
@@ -116,6 +116,34 @@ type ChatCompletionsStreamResponse struct {
 	Usage             *Usage                                `json:"usage"`
 }

+func (c *ChatCompletionsStreamResponse) IsToolCall() bool {
+	if len(c.Choices) == 0 {
+		return false
+	}
+	return len(c.Choices[0].Delta.ToolCalls) > 0
+}
+
+func (c *ChatCompletionsStreamResponse) GetFirstToolCall() *ToolCallResponse {
+	if c.IsToolCall() {
+		return &c.Choices[0].Delta.ToolCalls[0]
+	}
+	return nil
+}
+
+func (c *ChatCompletionsStreamResponse) Copy() *ChatCompletionsStreamResponse {
+	choices := make([]ChatCompletionsStreamResponseChoice, len(c.Choices))
+	copy(choices, c.Choices)
+	return &ChatCompletionsStreamResponse{
+		Id:                c.Id,
+		Object:            c.Object,
+		Created:           c.Created,
+		Model:             c.Model,
+		SystemFingerprint: c.SystemFingerprint,
+		Choices:           choices,
+		Usage:             c.Usage,
+	}
+}
+
 func (c *ChatCompletionsStreamResponse) GetSystemFingerprint() string {
 	if c.SystemFingerprint == nil {
 		return ""
@@ -140,9 +168,93 @@ type CompletionsStreamResponse struct {
 }

 type Usage struct {
-	PromptTokens           int                `json:"prompt_tokens"`
-	CompletionTokens       int                `json:"completion_tokens"`
-	TotalTokens            int                `json:"total_tokens"`
+	PromptTokens         int `json:"prompt_tokens"`
+	CompletionTokens     int `json:"completion_tokens"`
+	TotalTokens          int `json:"total_tokens"`
+	PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
+
 	PromptTokensDetails    InputTokenDetails  `json:"prompt_tokens_details"`
 	CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
+	InputTokens            int                `json:"input_tokens"`
+	OutputTokens           int                `json:"output_tokens"`
+	InputTokensDetails     *InputTokenDetails `json:"input_tokens_details"`
+}
+
+type InputTokenDetails struct {
+	CachedTokens         int `json:"cached_tokens"`
+	CachedCreationTokens int `json:"-"`
+	TextTokens           int `json:"text_tokens"`
+	AudioTokens          int `json:"audio_tokens"`
+	ImageTokens          int `json:"image_tokens"`
+}
+
+type OutputTokenDetails struct {
+	TextTokens      int `json:"text_tokens"`
+	AudioTokens     int `json:"audio_tokens"`
+	ReasoningTokens int `json:"reasoning_tokens"`
+}
+
+type OpenAIResponsesResponse struct {
+	ID                 string               `json:"id"`
+	Object             string               `json:"object"`
+	CreatedAt          int                  `json:"created_at"`
+	Status             string               `json:"status"`
+	Error              *OpenAIError         `json:"error,omitempty"`
+	IncompleteDetails  *IncompleteDetails   `json:"incomplete_details,omitempty"`
+	Instructions       string               `json:"instructions"`
+	MaxOutputTokens    int                  `json:"max_output_tokens"`
+	Model              string               `json:"model"`
+	Output             []ResponsesOutput    `json:"output"`
+	ParallelToolCalls  bool                 `json:"parallel_tool_calls"`
+	PreviousResponseID string               `json:"previous_response_id"`
+	Reasoning          *Reasoning           `json:"reasoning"`
+	Store              bool                 `json:"store"`
+	Temperature        float64              `json:"temperature"`
+	ToolChoice         string               `json:"tool_choice"`
+	Tools              []ResponsesToolsCall `json:"tools"`
+	TopP               float64              `json:"top_p"`
+	Truncation         string               `json:"truncation"`
+	Usage              *Usage               `json:"usage"`
+	User               json.RawMessage      `json:"user"`
+	Metadata           json.RawMessage      `json:"metadata"`
+}
+
+type IncompleteDetails struct {
+	Reasoning string `json:"reasoning"`
+}
+
+type ResponsesOutput struct {
+	Type    string                   `json:"type"`
+	ID      string                   `json:"id"`
+	Status  string                   `json:"status"`
+	Role    string                   `json:"role"`
+	Content []ResponsesOutputContent `json:"content"`
+}
+
+type ResponsesOutputContent struct {
+	Type        string        `json:"type"`
+	Text        string        `json:"text"`
+	Annotations []interface{} `json:"annotations"`
+}
+
+const (
+	BuildInToolWebSearchPreview = "web_search_preview"
+	BuildInToolFileSearch       = "file_search"
+)
+
+const (
+	BuildInCallWebSearchCall = "web_search_call"
+)
+
+const (
+	ResponsesOutputTypeItemAdded = "response.output_item.added"
+	ResponsesOutputTypeItemDone  = "response.output_item.done"
+)
+
+// ResponsesStreamResponse 用于处理 /v1/responses 流式响应
+type ResponsesStreamResponse struct {
+	Type     string                   `json:"type"`
+	Response *OpenAIResponsesResponse `json:"response,omitempty"`
+	Delta    string                   `json:"delta,omitempty"`
+	Item     *ResponsesOutput         `json:"item,omitempty"`
 }
--- a/dto/realtime.go
+++ b/dto/realtime.go
@@ -43,18 +43,6 @@ type RealtimeUsage struct {
 	OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
 }

-type InputTokenDetails struct {
-	CachedTokens int `json:"cached_tokens"`
-	TextTokens   int `json:"text_tokens"`
-	AudioTokens  int `json:"audio_tokens"`
-	ImageTokens  int `json:"image_tokens"`
-}
-
-type OutputTokenDetails struct {
-	TextTokens  int `json:"text_tokens"`
-	AudioTokens int `json:"audio_tokens"`
-}
-
 type RealtimeSession struct {
 	Modalities              []string                `json:"modalities"`
 	Instructions            string                  `json:"instructions"`
--- a/dto/rerank.go
+++ b/dto/rerank.go
@@ -5,18 +5,29 @@ type RerankRequest struct {
 	Query           string `json:"query"`
 	Model           string `json:"model"`
 	TopN            int    `json:"top_n"`
-	ReturnDocuments bool   `json:"return_documents,omitempty"`
+	ReturnDocuments *bool  `json:"return_documents,omitempty"`
 	MaxChunkPerDoc  int    `json:"max_chunk_per_doc,omitempty"`
 	OverLapTokens   int    `json:"overlap_tokens,omitempty"`
 }

-type RerankResponseDocument struct {
+func (r *RerankRequest) GetReturnDocuments() bool {
+	if r.ReturnDocuments == nil {
+		return false
+	}
+	return *r.ReturnDocuments
+}
+
+type RerankResponseResult struct {
 	Document       any     `json:"document,omitempty"`
 	Index          int     `json:"index"`
 	RelevanceScore float64 `json:"relevance_score"`
 }

-type RerankResponse struct {
-	Results []RerankResponseDocument `json:"results"`
-	Usage   Usage                    `json:"usage"`
+type RerankDocument struct {
+	Text any `json:"text"`
+}
+
+type RerankResponse struct {
+	Results []RerankResponseResult `json:"results"`
+	Usage   Usage                  `json:"usage"`
 }
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/credentials v1.17.11
 	github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.7.4
 	github.com/bytedance/gopkg v0.0.0-20220118071334-3db87571198b
+	github.com/bytedance/sonic v1.11.6
 	github.com/gin-contrib/cors v1.7.2
 	github.com/gin-contrib/gzip v0.0.6
 	github.com/gin-contrib/sessions v0.0.5
@@ -22,15 +23,15 @@ require (
 	github.com/golang-jwt/jwt v3.2.2+incompatible
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/websocket v1.5.0
-	github.com/jinzhu/copier v0.4.0
 	github.com/joho/godotenv v1.5.1
 	github.com/pkg/errors v0.9.1
 	github.com/pkoukk/tiktoken-go v0.1.7
 	github.com/samber/lo v1.39.0
 	github.com/shirou/gopsutil v3.21.11+incompatible
-	golang.org/x/crypto v0.27.0
+	github.com/shopspring/decimal v1.4.0
+	golang.org/x/crypto v0.35.0
 	golang.org/x/image v0.23.0
-	golang.org/x/net v0.28.0
+	golang.org/x/net v0.35.0
 	gorm.io/driver/mysql v1.4.3
 	gorm.io/driver/postgres v1.5.2
 	gorm.io/gorm v1.25.2
@@ -42,7 +43,6 @@ require (
 	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 // indirect
 	github.com/aws/smithy-go v1.20.2 // indirect
-	github.com/bytedance/sonic v1.11.6 // indirect
 	github.com/bytedance/sonic/loader v0.1.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/cloudwego/base64x v0.1.4 // indirect
@@ -84,9 +84,9 @@ require (
 	github.com/yusufpapurcu/wmi v1.2.3 // indirect
 	golang.org/x/arch v0.12.0 // indirect
 	golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 // indirect
-	golang.org/x/sync v0.10.0 // indirect
-	golang.org/x/sys v0.27.0 // indirect
-	golang.org/x/text v0.21.0 // indirect
+	golang.org/x/sync v0.11.0 // indirect
+	golang.org/x/sys v0.30.0 // indirect
+	golang.org/x/text v0.22.0 // indirect
 	google.golang.org/protobuf v1.34.2 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	modernc.org/libc v1.22.5 // indirect
--- a/go.sum
+++ b/go.sum
@@ -117,8 +117,6 @@ github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs=
 github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA=
 github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
 github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
-github.com/jinzhu/copier v0.4.0 h1:w3ciUoD19shMCRargcpm0cm91ytaBhDvuRpz1ODO/U8=
-github.com/jinzhu/copier v0.4.0/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg=
 github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
 github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
 github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
@@ -183,6 +181,8 @@ github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
 github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
 github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
 github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -217,18 +217,18 @@ golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUu
 golang.org/x/arch v0.12.0 h1:UsYJhbzPYGsT0HbEdmYcqtCv8UNGvnaL561NnIUvaKg=
 golang.org/x/arch v0.12.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
 golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
-golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
+golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs=
+golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ=
 golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0 h1:985EYyeCOxTpcgOTJpflJUwOeEz0CQOdPt73OzpE9F8=
 golang.org/x/exp v0.0.0-20240404231335-c0f41cb1a7a0/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI=
 golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
 golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
-golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
+golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
-golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
+golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -239,14 +239,14 @@ golang.org/x/sys v0.0.0-20220110181412-a018aaa089fe/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
-golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
+golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
-golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
+golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
--- a/main.go
+++ b/main.go
@@ -12,6 +12,7 @@ import (
 	"one-api/model"
 	"one-api/router"
 	"one-api/service"
+	"one-api/setting/operation_setting"
 	"os"
 	"strconv"

@@ -33,7 +34,7 @@ var indexPage []byte
 func main() {
 	err := godotenv.Load(".env")
 	if err != nil {
-		common.SysLog("Support for .env file is disabled")
+		common.SysLog("Support for .env file is disabled: " + err.Error())
 	}

 	common.LoadEnv()
@@ -51,6 +52,9 @@ func main() {
 	if err != nil {
 		common.FatalLog("failed to initialize database: " + err.Error())
 	}
+
+	model.CheckSetup()
+
 	// Initialize SQL Database
 	err = model.InitLogDB()
 	if err != nil {
@@ -69,10 +73,15 @@ func main() {
 		common.FatalLog("failed to initialize Redis: " + err.Error())
 	}

+	// Initialize model settings
+	operation_setting.InitRatioSettings()
 	// Initialize constants
 	constant.InitEnv()
 	// Initialize options
 	model.InitOptionMap()
+
+	service.InitTokenEncoders()
+
 	if common.RedisEnabled {
 		// for compatibility with old versions
 		common.MemoryCacheEnabled = true
@@ -126,8 +135,6 @@ func main() {
 		common.SysLog("pprof enabled")
 	}

-	service.InitTokenEncoders()
-
 	// Initialize HTTP server
 	server := gin.New()
 	server.Use(gin.CustomRecovery(func(c *gin.Context, err any) {
--- a/middleware/auth.go
+++ b/middleware/auth.go
@@ -174,6 +174,14 @@ func TokenAuth() func(c *gin.Context) {
 			}
 			c.Request.Header.Set("Authorization", "Bearer "+key)
 		}
+		// 检查path包含/v1/messages
+		if strings.Contains(c.Request.URL.Path, "/v1/messages") {
+			// 从x-api-key中获取key
+			key := c.Request.Header.Get("x-api-key")
+			if key != "" {
+				c.Request.Header.Set("Authorization", "Bearer "+key)
+			}
+		}
 		key := c.Request.Header.Get("Authorization")
 		parts := make([]string, 0)
 		key = strings.TrimPrefix(key, "Bearer ")
@@ -199,15 +207,19 @@ func TokenAuth() func(c *gin.Context) {
 			abortWithOpenAiMessage(c, http.StatusUnauthorized, err.Error())
 			return
 		}
-		userEnabled, err := model.IsUserEnabled(token.UserId, false)
+		userCache, err := model.GetUserCache(token.UserId)
 		if err != nil {
 			abortWithOpenAiMessage(c, http.StatusInternalServerError, err.Error())
 			return
 		}
+		userEnabled := userCache.Status == common.UserStatusEnabled
 		if !userEnabled {
 			abortWithOpenAiMessage(c, http.StatusForbidden, "用户已被封禁")
 			return
 		}
+
+		userCache.WriteContext(c)
+
 		c.Set("id", token.UserId)
 		c.Set("token_id", token.Id)
 		c.Set("token_key", token.Key)
--- a/middleware/distributor.go
+++ b/middleware/distributor.go
@@ -32,7 +32,6 @@ func Distribute() func(c *gin.Context) {
 				return
 			}
 		}
-		userId := c.GetInt("id")
 		var channel *model.Channel
 		channelId, ok := c.Get("specific_channel_id")
 		modelRequest, shouldSelectChannel, err := getModelRequest(c)
@@ -40,7 +39,7 @@ func Distribute() func(c *gin.Context) {
 			abortWithOpenAiMessage(c, http.StatusBadRequest, "Invalid request, "+err.Error())
 			return
 		}
-		userGroup, _ := model.GetUserGroup(userId, false)
+		userGroup := c.GetString(constant.ContextKeyUserGroup)
 		tokenGroup := c.GetString("token_group")
 		if tokenGroup != "" {
 			// check common.UserUsableGroups[userGroup]
@@ -163,7 +162,7 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 		}
 		c.Set("platform", string(constant.TaskPlatformSuno))
 		c.Set("relay_mode", relayMode)
-	} else if !strings.HasPrefix(c.Request.URL.Path, "/v1/audio/transcriptions") {
+	} else if !strings.HasPrefix(c.Request.URL.Path, "/v1/audio/transcriptions") && !strings.HasPrefix(c.Request.URL.Path, "/v1/images/edits") {
 		err = common.UnmarshalBodyReusable(c, &modelRequest)
 	}
 	if err != nil {
@@ -185,6 +184,8 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 	}
 	if strings.HasPrefix(c.Request.URL.Path, "/v1/images/generations") {
 		modelRequest.Model = common.GetStringIfEmpty(modelRequest.Model, "dall-e")
+	} else if strings.HasPrefix(c.Request.URL.Path, "/v1/images/edits") {
+		modelRequest.Model = common.GetStringIfEmpty(modelRequest.Model, "gpt-image-1")
 	}
 	if strings.HasPrefix(c.Request.URL.Path, "/v1/audio") {
 		relayMode := relayconstant.RelayModeAudioSpeech
@@ -212,7 +213,9 @@ func SetupContextForSelectedChannel(c *gin.Context, channel *model.Channel, mode
 	c.Set("channel_id", channel.Id)
 	c.Set("channel_name", channel.Name)
 	c.Set("channel_type", channel.Type)
+	c.Set("channel_create_time", channel.CreatedTime)
 	c.Set("channel_setting", channel.GetSetting())
+	c.Set("param_override", channel.GetParamOverride())
 	if nil != channel.OpenAIOrganization && "" != *channel.OpenAIOrganization {
 		c.Set("channel_organization", *channel.OpenAIOrganization)
 	}
--- a/middleware/model-rate-limit.go
+++ b/middleware/model-rate-limit.go
@@ -0,0 +1,185 @@
+package middleware
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"one-api/common"
+	"one-api/common/limiter"
+	"one-api/setting"
+	"strconv"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/go-redis/redis/v8"
+)
+
+const (
+	ModelRequestRateLimitCountMark        = "MRRL"
+	ModelRequestRateLimitSuccessCountMark = "MRRLS"
+)
+
+// 检查Redis中的请求限制
+func checkRedisRateLimit(ctx context.Context, rdb *redis.Client, key string, maxCount int, duration int64) (bool, error) {
+	// 如果maxCount为0，表示不限制
+	if maxCount == 0 {
+		return true, nil
+	}
+
+	// 获取当前计数
+	length, err := rdb.LLen(ctx, key).Result()
+	if err != nil {
+		return false, err
+	}
+
+	// 如果未达到限制，允许请求
+	if length < int64(maxCount) {
+		return true, nil
+	}
+
+	// 检查时间窗口
+	oldTimeStr, _ := rdb.LIndex(ctx, key, -1).Result()
+	oldTime, err := time.Parse(timeFormat, oldTimeStr)
+	if err != nil {
+		return false, err
+	}
+
+	nowTimeStr := time.Now().Format(timeFormat)
+	nowTime, err := time.Parse(timeFormat, nowTimeStr)
+	if err != nil {
+		return false, err
+	}
+	// 如果在时间窗口内已达到限制，拒绝请求
+	subTime := nowTime.Sub(oldTime).Seconds()
+	if int64(subTime) < duration {
+		rdb.Expire(ctx, key, time.Duration(setting.ModelRequestRateLimitDurationMinutes)*time.Minute)
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// 记录Redis请求
+func recordRedisRequest(ctx context.Context, rdb *redis.Client, key string, maxCount int) {
+	// 如果maxCount为0，不记录请求
+	if maxCount == 0 {
+		return
+	}
+
+	now := time.Now().Format(timeFormat)
+	rdb.LPush(ctx, key, now)
+	rdb.LTrim(ctx, key, 0, int64(maxCount-1))
+	rdb.Expire(ctx, key, time.Duration(setting.ModelRequestRateLimitDurationMinutes)*time.Minute)
+}
+
+// Redis限流处理器
+func redisRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		ctx := context.Background()
+		rdb := common.RDB
+
+		// 1. 检查成功请求数限制
+		successKey := fmt.Sprintf("rateLimit:%s:%s", ModelRequestRateLimitSuccessCountMark, userId)
+		allowed, err := checkRedisRateLimit(ctx, rdb, successKey, successMaxCount, duration)
+		if err != nil {
+			fmt.Println("检查成功请求数限制失败:", err.Error())
+			abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+			return
+		}
+		if !allowed {
+			abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到请求数限制：%d分钟内最多请求%d次", setting.ModelRequestRateLimitDurationMinutes, successMaxCount))
+			return
+		}
+
+		//2.检查总请求数限制并记录总请求（当totalMaxCount为0时会自动跳过，使用令牌桶限流器
+		if totalMaxCount > 0 {
+			totalKey := fmt.Sprintf("rateLimit:%s", userId)
+			// 初始化
+			tb := limiter.New(ctx, rdb)
+			allowed, err = tb.Allow(
+				ctx,
+				totalKey,
+				limiter.WithCapacity(int64(totalMaxCount)*duration),
+				limiter.WithRate(int64(totalMaxCount)),
+				limiter.WithRequested(duration),
+			)
+
+			if err != nil {
+				fmt.Println("检查总请求数限制失败:", err.Error())
+				abortWithOpenAiMessage(c, http.StatusInternalServerError, "rate_limit_check_failed")
+				return
+			}
+
+			if !allowed {
+				abortWithOpenAiMessage(c, http.StatusTooManyRequests, fmt.Sprintf("您已达到总请求数限制：%d分钟内最多请求%d次，包括失败次数，请检查您的请求是否正确", setting.ModelRequestRateLimitDurationMinutes, totalMaxCount))
+			}
+		}
+
+		// 4. 处理请求
+		c.Next()
+
+		// 5. 如果请求成功，记录成功请求
+		if c.Writer.Status() < 400 {
+			recordRedisRequest(ctx, rdb, successKey, successMaxCount)
+		}
+	}
+}
+
+// 内存限流处理器
+func memoryRateLimitHandler(duration int64, totalMaxCount, successMaxCount int) gin.HandlerFunc {
+	inMemoryRateLimiter.Init(time.Duration(setting.ModelRequestRateLimitDurationMinutes) * time.Minute)
+
+	return func(c *gin.Context) {
+		userId := strconv.Itoa(c.GetInt("id"))
+		totalKey := ModelRequestRateLimitCountMark + userId
+		successKey := ModelRequestRateLimitSuccessCountMark + userId
+
+		// 1. 检查总请求数限制（当totalMaxCount为0时跳过）
+		if totalMaxCount > 0 && !inMemoryRateLimiter.Request(totalKey, totalMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 2. 检查成功请求数限制
+		// 使用一个临时key来检查限制，这样可以避免实际记录
+		checkKey := successKey + "_check"
+		if !inMemoryRateLimiter.Request(checkKey, successMaxCount, duration) {
+			c.Status(http.StatusTooManyRequests)
+			c.Abort()
+			return
+		}
+
+		// 3. 处理请求
+		c.Next()
+
+		// 4. 如果请求成功，记录到实际的成功请求计数中
+		if c.Writer.Status() < 400 {
+			inMemoryRateLimiter.Request(successKey, successMaxCount, duration)
+		}
+	}
+}
+
+// ModelRequestRateLimit 模型请求限流中间件
+func ModelRequestRateLimit() func(c *gin.Context) {
+	return func(c *gin.Context) {
+		// 在每个请求时检查是否启用限流
+		if !setting.ModelRequestRateLimitEnabled {
+			c.Next()
+			return
+		}
+
+		// 计算限流参数
+		duration := int64(setting.ModelRequestRateLimitDurationMinutes * 60)
+		totalMaxCount := setting.ModelRequestRateLimitCount
+		successMaxCount := setting.ModelRequestRateLimitSuccessCount
+
+		// 根据存储类型选择并执行限流处理器
+		if common.RedisEnabled {
+			redisRateLimitHandler(duration, totalMaxCount, successMaxCount)(c)
+		} else {
+			memoryRateLimitHandler(duration, totalMaxCount, successMaxCount)(c)
+		}
+	}
+}
--- a/model/cache.go
+++ b/model/cache.go
@@ -84,9 +84,11 @@ func CacheGetRandomSatisfiedChannel(group string, model string, retry int) (*Cha
 	if !common.MemoryCacheEnabled {
 		return GetRandomSatisfiedChannel(group, model, retry)
 	}
+	
 	channelSyncLock.RLock()
-	defer channelSyncLock.RUnlock()
 	channels := group2model2channels[group][model]
+	channelSyncLock.RUnlock()
+	
 	if len(channels) == 0 {
 		return nil, errors.New("channel not found")
 	}
--- a/model/channel.go
+++ b/model/channel.go
@@ -35,7 +35,8 @@ type Channel struct {
 	AutoBan           *int    `json:"auto_ban" gorm:"default:1"`
 	OtherInfo         string  `json:"other_info"`
 	Tag               *string `json:"tag" gorm:"index"`
-	Setting           string  `json:"setting" gorm:"type:text"`
+	Setting           *string `json:"setting" gorm:"type:text"`
+	ParamOverride     *string `json:"param_override" gorm:"type:text"`
 }

 func (channel *Channel) GetModels() []string {
@@ -118,10 +119,15 @@ func SearchChannels(keyword string, group string, model string, idSort bool) ([]

 	// 如果是 PostgreSQL，使用双引号
 	if common.UsingPostgreSQL {
-		keyCol = `"key"`
 		modelsCol = `"models"`
 	}

+	baseURLCol := "`base_url`"
+	// 如果是 PostgreSQL，使用双引号
+	if common.UsingPostgreSQL {
+		baseURLCol = `"base_url"`
+	}
+
 	order := "priority desc"
 	if idSort {
 		order = "id desc"
@@ -141,11 +147,11 @@ func SearchChannels(keyword string, group string, model string, idSort bool) ([]
 			// sqlite, PostgreSQL
 			groupCondition = `(',' || ` + groupCol + ` || ',') LIKE ?`
 		}
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%", "%,"+group+",%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%", "%,"+group+",%")
 	} else {
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + " LIKE ?"
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + " LIKE ?"
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%")
 	}

 	// 执行查询
@@ -290,35 +296,42 @@ func (channel *Channel) Delete() error {

 var channelStatusLock sync.Mutex

-func UpdateChannelStatusById(id int, status int, reason string) {
+func UpdateChannelStatusById(id int, status int, reason string) bool {
 	if common.MemoryCacheEnabled {
 		channelStatusLock.Lock()
+		defer channelStatusLock.Unlock()
+
 		channelCache, _ := CacheGetChannel(id)
 		// 如果缓存渠道存在，且状态已是目标状态，直接返回
 		if channelCache != nil && channelCache.Status == status {
-			channelStatusLock.Unlock()
-			return
+			return false
 		}
 		// 如果缓存渠道不存在(说明已经被禁用)，且要设置的状态不为启用，直接返回
 		if channelCache == nil && status != common.ChannelStatusEnabled {
-			channelStatusLock.Unlock()
-			return
+			return false
 		}
 		CacheUpdateChannelStatus(id, status)
-		channelStatusLock.Unlock()
 	}
 	err := UpdateAbilityStatus(id, status == common.ChannelStatusEnabled)
 	if err != nil {
 		common.SysError("failed to update ability status: " + err.Error())
+		return false
 	}
 	channel, err := GetChannelById(id, true)
 	if err != nil {
 		// find channel by id error, directly update status
-		err = DB.Model(&Channel{}).Where("id = ?", id).Update("status", status).Error
-		if err != nil {
-			common.SysError("failed to update channel status: " + err.Error())
+		result := DB.Model(&Channel{}).Where("id = ?", id).Update("status", status)
+		if result.Error != nil {
+			common.SysError("failed to update channel status: " + result.Error.Error())
+			return false
+		}
+		if result.RowsAffected == 0 {
+			return false
 		}
 	} else {
+		if channel.Status == status {
+			return false
+		}
 		// find channel by id success, update status and other info
 		info := channel.GetOtherInfo()
 		info["status_reason"] = reason
@@ -328,9 +341,10 @@ func UpdateChannelStatusById(id int, status int, reason string) {
 		err = channel.Save()
 		if err != nil {
 			common.SysError("failed to update channel status: " + err.Error())
+			return false
 		}
 	}
-
+	return true
 }

 func EnableChannelByTag(tag string) error {
@@ -441,6 +455,12 @@ func SearchTags(keyword string, group string, model string, idSort bool) ([]*str
 		modelsCol = `"models"`
 	}

+	baseURLCol := "`base_url`"
+	// 如果是 PostgreSQL，使用双引号
+	if common.UsingPostgreSQL {
+		baseURLCol = `"base_url"`
+	}
+
 	order := "priority desc"
 	if idSort {
 		order = "id desc"
@@ -460,11 +480,11 @@ func SearchTags(keyword string, group string, model string, idSort bool) ([]*str
 			// sqlite, PostgreSQL
 			groupCondition = `(',' || ` + groupCol + ` || ',') LIKE ?`
 		}
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%", "%,"+group+",%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + ` LIKE ? AND ` + groupCondition
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%", "%,"+group+",%")
 	} else {
-		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ?) AND " + modelsCol + " LIKE ?"
-		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+model+"%")
+		whereClause = "(id = ? OR name LIKE ? OR " + keyCol + " = ? OR " + baseURLCol + " LIKE ?) AND " + modelsCol + " LIKE ?"
+		args = append(args, common.String2Int(keyword), "%"+keyword+"%", keyword, "%"+keyword+"%", "%"+model+"%")
 	}

 	subQuery := baseQuery.Where(whereClause, args...).
@@ -485,8 +505,8 @@ func SearchTags(keyword string, group string, model string, idSort bool) ([]*str

 func (channel *Channel) GetSetting() map[string]interface{} {
 	setting := make(map[string]interface{})
-	if channel.Setting != "" {
-		err := json.Unmarshal([]byte(channel.Setting), &setting)
+	if channel.Setting != nil && *channel.Setting != "" {
+		err := json.Unmarshal([]byte(*channel.Setting), &setting)
 		if err != nil {
 			common.SysError("failed to unmarshal setting: " + err.Error())
 		}
@@ -500,7 +520,18 @@ func (channel *Channel) SetSetting(setting map[string]interface{}) {
 		common.SysError("failed to marshal setting: " + err.Error())
 		return
 	}
-	channel.Setting = string(settingBytes)
+	channel.Setting = common.GetPointer[string](string(settingBytes))
+}
+
+func (channel *Channel) GetParamOverride() map[string]interface{} {
+	paramOverride := make(map[string]interface{})
+	if channel.ParamOverride != nil && *channel.ParamOverride != "" {
+		err := json.Unmarshal([]byte(*channel.ParamOverride), &paramOverride)
+		if err != nil {
+			common.SysError("failed to unmarshal param override: " + err.Error())
+		}
+	}
+	return paramOverride
 }

 func GetChannelsByIds(ids []int) ([]*Channel, error) {
--- a/model/log.go
+++ b/model/log.go
@@ -8,6 +8,8 @@ import (
 	"strings"
 	"time"

+	"github.com/gin-gonic/gin"
+
 	"github.com/bytedance/gopkg/util/gopool"
 	"gorm.io/gorm"
 )
@@ -18,7 +20,7 @@ type Log struct {
 	CreatedAt        int64  `json:"created_at" gorm:"bigint;index:idx_created_at_id,priority:2;index:idx_created_at_type"`
 	Type             int    `json:"type" gorm:"index:idx_created_at_type"`
 	Content          string `json:"content"`
-	Username         string `json:"username" gorm:"index:index_username_model_name,priority:2;default:''"`
+	Username         string `json:"username" gorm:"index;index:index_username_model_name,priority:2;default:''"`
 	TokenName        string `json:"token_name" gorm:"index;default:''"`
 	ModelName        string `json:"model_name" gorm:"index;index:index_username_model_name,priority:1;default:''"`
 	Quota            int    `json:"quota" gorm:"default:0"`
@@ -39,6 +41,7 @@ const (
 	LogTypeConsume
 	LogTypeManage
 	LogTypeSystem
+	LogTypeError
 )

 func formatUserLogs(logs []*Log) {
@@ -87,14 +90,43 @@ func RecordLog(userId int, logType int, content string) {
 	}
 }

-func RecordConsumeLog(ctx context.Context, userId int, channelId int, promptTokens int, completionTokens int,
+func RecordErrorLog(c *gin.Context, userId int, channelId int, modelName string, tokenName string, content string, tokenId int, useTimeSeconds int,
+	isStream bool, group string, other map[string]interface{}) {
+	common.LogInfo(c, fmt.Sprintf("record error log: userId=%d, channelId=%d, modelName=%s, tokenName=%s, content=%s", userId, channelId, modelName, tokenName, content))
+	username := c.GetString("username")
+	otherStr := common.MapToJsonStr(other)
+	log := &Log{
+		UserId:           userId,
+		Username:         username,
+		CreatedAt:        common.GetTimestamp(),
+		Type:             LogTypeError,
+		Content:          content,
+		PromptTokens:     0,
+		CompletionTokens: 0,
+		TokenName:        tokenName,
+		ModelName:        modelName,
+		Quota:            0,
+		ChannelId:        channelId,
+		TokenId:          tokenId,
+		UseTime:          useTimeSeconds,
+		IsStream:         isStream,
+		Group:            group,
+		Other:            otherStr,
+	}
+	err := LOG_DB.Create(log).Error
+	if err != nil {
+		common.LogError(c, "failed to record log: "+err.Error())
+	}
+}
+
+func RecordConsumeLog(c *gin.Context, userId int, channelId int, promptTokens int, completionTokens int,
 	modelName string, tokenName string, quota int, content string, tokenId int, userQuota int, useTimeSeconds int,
 	isStream bool, group string, other map[string]interface{}) {
-	common.LogInfo(ctx, fmt.Sprintf("record consume log: userId=%d, 用户调用前余额=%d, channelId=%d, promptTokens=%d, completionTokens=%d, modelName=%s, tokenName=%s, quota=%d, content=%s", userId, userQuota, channelId, promptTokens, completionTokens, modelName, tokenName, quota, content))
+	common.LogInfo(c, fmt.Sprintf("record consume log: userId=%d, 用户调用前余额=%d, channelId=%d, promptTokens=%d, completionTokens=%d, modelName=%s, tokenName=%s, quota=%d, content=%s", userId, userQuota, channelId, promptTokens, completionTokens, modelName, tokenName, quota, content))
 	if !common.LogConsumeEnabled {
 		return
 	}
-	username, _ := GetUsernameById(userId, false)
+	username := c.GetString("username")
 	otherStr := common.MapToJsonStr(other)
 	log := &Log{
 		UserId:           userId,
@@ -116,7 +148,7 @@ func RecordConsumeLog(ctx context.Context, userId int, channelId int, promptToke
 	}
 	err := LOG_DB.Create(log).Error
 	if err != nil {
-		common.LogError(ctx, "failed to record log: "+err.Error())
+		common.LogError(c, "failed to record log: "+err.Error())
 	}
 	if common.DataExportEnabled {
 		gopool.Go(func() {
@@ -309,7 +341,25 @@ func SumUsedToken(logType int, startTimestamp int64, endTimestamp int64, modelNa
 	return token
 }

-func DeleteOldLog(targetTimestamp int64) (int64, error) {
-	result := LOG_DB.Where("created_at < ?", targetTimestamp).Delete(&Log{})
-	return result.RowsAffected, result.Error
+func DeleteOldLog(ctx context.Context, targetTimestamp int64, limit int) (int64, error) {
+	var total int64 = 0
+
+	for {
+		if nil != ctx.Err() {
+			return total, ctx.Err()
+		}
+
+		result := LOG_DB.Where("created_at < ?", targetTimestamp).Limit(limit).Delete(&Log{})
+		if nil != result.Error {
+			return total, result.Error
+		}
+
+		total += result.RowsAffected
+
+		if result.RowsAffected < int64(limit) {
+			break
+		}
+	}
+
+	return total, nil
 }
--- a/model/main.go
+++ b/model/main.go
@@ -1,16 +1,18 @@
 package model

 import (
-	"github.com/glebarez/sqlite"
-	"gorm.io/driver/mysql"
-	"gorm.io/driver/postgres"
-	"gorm.io/gorm"
 	"log"
 	"one-api/common"
+	"one-api/constant"
 	"os"
 	"strings"
 	"sync"
 	"time"
+
+	"github.com/glebarez/sqlite"
+	"gorm.io/driver/mysql"
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
 )

 var groupCol string
@@ -54,13 +56,40 @@ func createRootAccountIfNeed() error {
 	return nil
 }

+func CheckSetup() {
+	setup := GetSetup()
+	if setup == nil {
+		// No setup record exists, check if we have a root user
+		if RootUserExists() {
+			common.SysLog("system is not initialized, but root user exists")
+			// Create setup record
+			newSetup := Setup{
+				Version:       common.Version,
+				InitializedAt: time.Now().Unix(),
+			}
+			err := DB.Create(&newSetup).Error
+			if err != nil {
+				common.SysLog("failed to create setup record: " + err.Error())
+			}
+			constant.Setup = true
+		} else {
+			common.SysLog("system is not initialized and no root user exists")
+			constant.Setup = false
+		}
+	} else {
+		// Setup record exists, system is initialized
+		common.SysLog("system is already initialized at: " + time.Unix(setup.InitializedAt, 0).String())
+		constant.Setup = true
+	}
+}
+
 func chooseDB(envName string) (*gorm.DB, error) {
 	defer func() {
 		initCol()
 	}()
 	dsn := os.Getenv(envName)
 	if dsn != "" {
-		if strings.HasPrefix(dsn, "postgres://") {
+		if strings.HasPrefix(dsn, "postgres://") || strings.HasPrefix(dsn, "postgresql://") {
 			// Use PostgreSQL
 			common.SysLog("using PostgreSQL as database")
 			common.UsingPostgreSQL = true
@@ -213,8 +242,9 @@ func migrateDB() error {
 	if err != nil {
 		return err
 	}
+	err = DB.AutoMigrate(&Setup{})
 	common.SysLog("database migrated")
-	err = createRootAccountIfNeed()
+	//err = createRootAccountIfNeed()
 	return err
 }

--- a/model/option.go
+++ b/model/option.go
@@ -3,6 +3,8 @@ package model
 import (
 	"one-api/common"
 	"one-api/setting"
+	"one-api/setting/config"
+	"one-api/setting/operation_setting"
 	"strconv"
 	"strings"
 	"time"
@@ -23,6 +25,8 @@ func AllOption() ([]*Option, error) {
 func InitOptionMap() {
 	common.OptionMapRWMutex.Lock()
 	common.OptionMap = make(map[string]string)
+
+	// 添加原有的系统配置
 	common.OptionMap["FileUploadPermission"] = strconv.Itoa(common.FileUploadPermission)
 	common.OptionMap["FileDownloadPermission"] = strconv.Itoa(common.FileDownloadPermission)
 	common.OptionMap["ImageUploadPermission"] = strconv.Itoa(common.ImageUploadPermission)
@@ -84,15 +88,19 @@ func InitOptionMap() {
 	common.OptionMap["QuotaForInviter"] = strconv.Itoa(common.QuotaForInviter)
 	common.OptionMap["QuotaForInvitee"] = strconv.Itoa(common.QuotaForInvitee)
 	common.OptionMap["QuotaRemindThreshold"] = strconv.Itoa(common.QuotaRemindThreshold)
-	common.OptionMap["ShouldPreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
-	common.OptionMap["ModelRatio"] = common.ModelRatio2JSONString()
-	common.OptionMap["ModelPrice"] = common.ModelPrice2JSONString()
+	common.OptionMap["PreConsumedQuota"] = strconv.Itoa(common.PreConsumedQuota)
+	common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
+	common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
+	common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
+	common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString()
+	common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString()
+	common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString()
 	common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
 	common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString()
-	common.OptionMap["CompletionRatio"] = common.CompletionRatio2JSONString()
+	common.OptionMap["CompletionRatio"] = operation_setting.CompletionRatio2JSONString()
 	common.OptionMap["TopUpLink"] = common.TopUpLink
-	common.OptionMap["ChatLink"] = common.ChatLink
-	common.OptionMap["ChatLink2"] = common.ChatLink2
+	//common.OptionMap["ChatLink"] = common.ChatLink
+	//common.OptionMap["ChatLink2"] = common.ChatLink2
 	common.OptionMap["QuotaPerUnit"] = strconv.FormatFloat(common.QuotaPerUnit, 'f', -1, 64)
 	common.OptionMap["RetryTimes"] = strconv.Itoa(common.RetryTimes)
 	common.OptionMap["DataExportInterval"] = strconv.Itoa(common.DataExportInterval)
@@ -104,13 +112,20 @@ func InitOptionMap() {
 	common.OptionMap["MjForwardUrlEnabled"] = strconv.FormatBool(setting.MjForwardUrlEnabled)
 	common.OptionMap["MjActionCheckSuccessEnabled"] = strconv.FormatBool(setting.MjActionCheckSuccessEnabled)
 	common.OptionMap["CheckSensitiveEnabled"] = strconv.FormatBool(setting.CheckSensitiveEnabled)
-	common.OptionMap["DemoSiteEnabled"] = strconv.FormatBool(setting.DemoSiteEnabled)
+	common.OptionMap["DemoSiteEnabled"] = strconv.FormatBool(operation_setting.DemoSiteEnabled)
+	common.OptionMap["SelfUseModeEnabled"] = strconv.FormatBool(operation_setting.SelfUseModeEnabled)
+	common.OptionMap["ModelRequestRateLimitEnabled"] = strconv.FormatBool(setting.ModelRequestRateLimitEnabled)
 	common.OptionMap["CheckSensitiveOnPromptEnabled"] = strconv.FormatBool(setting.CheckSensitiveOnPromptEnabled)
-	//common.OptionMap["CheckSensitiveOnCompletionEnabled"] = strconv.FormatBool(constant.CheckSensitiveOnCompletionEnabled)
 	common.OptionMap["StopOnSensitiveEnabled"] = strconv.FormatBool(setting.StopOnSensitiveEnabled)
 	common.OptionMap["SensitiveWords"] = setting.SensitiveWordsToString()
 	common.OptionMap["StreamCacheQueueLength"] = strconv.Itoa(setting.StreamCacheQueueLength)
-	common.OptionMap["AutomaticDisableKeywords"] = setting.AutomaticDisableKeywordsToString()
+	common.OptionMap["AutomaticDisableKeywords"] = operation_setting.AutomaticDisableKeywordsToString()
+
+	// 自动添加所有注册的模型配置
+	modelConfigs := config.GlobalConfig.ExportAllConfigs()
+	for k, v := range modelConfigs {
+		common.OptionMap[k] = v
+	}

 	common.OptionMapRWMutex.Unlock()
 	loadOptionsFromDatabase()
@@ -154,6 +169,13 @@ func updateOptionMap(key string, value string) (err error) {
 	common.OptionMapRWMutex.Lock()
 	defer common.OptionMapRWMutex.Unlock()
 	common.OptionMap[key] = value
+
+	// 检查是否是模型配置 - 使用更规范的方式处理
+	if handleConfigUpdate(key, value) {
+		return nil // 已由配置系统处理
+	}
+
+	// 处理传统配置项...
 	if strings.HasSuffix(key, "Permission") {
 		intValue, _ := strconv.Atoi(value)
 		switch key {
@@ -223,11 +245,13 @@ func updateOptionMap(key string, value string) (err error) {
 		case "CheckSensitiveEnabled":
 			setting.CheckSensitiveEnabled = boolValue
 		case "DemoSiteEnabled":
-			setting.DemoSiteEnabled = boolValue
+			operation_setting.DemoSiteEnabled = boolValue
+		case "SelfUseModeEnabled":
+			operation_setting.SelfUseModeEnabled = boolValue
 		case "CheckSensitiveOnPromptEnabled":
 			setting.CheckSensitiveOnPromptEnabled = boolValue
-		//case "CheckSensitiveOnCompletionEnabled":
-		//	constant.CheckSensitiveOnCompletionEnabled = boolValue
+		case "ModelRequestRateLimitEnabled":
+			setting.ModelRequestRateLimitEnabled = boolValue
 		case "StopOnSensitiveEnabled":
 			setting.StopOnSensitiveEnabled = boolValue
 		case "SMTPSSLEnabled":
@@ -306,8 +330,14 @@ func updateOptionMap(key string, value string) (err error) {
 		common.QuotaForInvitee, _ = strconv.Atoi(value)
 	case "QuotaRemindThreshold":
 		common.QuotaRemindThreshold, _ = strconv.Atoi(value)
-	case "ShouldPreConsumedQuota":
+	case "PreConsumedQuota":
 		common.PreConsumedQuota, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitCount":
+		setting.ModelRequestRateLimitCount, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitDurationMinutes":
+		setting.ModelRequestRateLimitDurationMinutes, _ = strconv.Atoi(value)
+	case "ModelRequestRateLimitSuccessCount":
+		setting.ModelRequestRateLimitSuccessCount, _ = strconv.Atoi(value)
 	case "RetryTimes":
 		common.RetryTimes, _ = strconv.Atoi(value)
 	case "DataExportInterval":
@@ -315,21 +345,23 @@ func updateOptionMap(key string, value string) (err error) {
 	case "DataExportDefaultTime":
 		common.DataExportDefaultTime = value
 	case "ModelRatio":
-		err = common.UpdateModelRatioByJSONString(value)
+		err = operation_setting.UpdateModelRatioByJSONString(value)
 	case "GroupRatio":
 		err = setting.UpdateGroupRatioByJSONString(value)
 	case "UserUsableGroups":
 		err = setting.UpdateUserUsableGroupsByJSONString(value)
 	case "CompletionRatio":
-		err = common.UpdateCompletionRatioByJSONString(value)
+		err = operation_setting.UpdateCompletionRatioByJSONString(value)
 	case "ModelPrice":
-		err = common.UpdateModelPriceByJSONString(value)
+		err = operation_setting.UpdateModelPriceByJSONString(value)
+	case "CacheRatio":
+		err = operation_setting.UpdateCacheRatioByJSONString(value)
 	case "TopUpLink":
 		common.TopUpLink = value
-	case "ChatLink":
-		common.ChatLink = value
-	case "ChatLink2":
-		common.ChatLink2 = value
+	//case "ChatLink":
+	//	common.ChatLink = value
+	//case "ChatLink2":
+	//	common.ChatLink2 = value
 	case "ChannelDisableThreshold":
 		common.ChannelDisableThreshold, _ = strconv.ParseFloat(value, 64)
 	case "QuotaPerUnit":
@@ -337,9 +369,34 @@ func updateOptionMap(key string, value string) (err error) {
 	case "SensitiveWords":
 		setting.SensitiveWordsFromString(value)
 	case "AutomaticDisableKeywords":
-		setting.AutomaticDisableKeywordsFromString(value)
+		operation_setting.AutomaticDisableKeywordsFromString(value)
 	case "StreamCacheQueueLength":
 		setting.StreamCacheQueueLength, _ = strconv.Atoi(value)
 	}
 	return err
 }
+
+// handleConfigUpdate 处理分层配置更新，返回是否已处理
+func handleConfigUpdate(key, value string) bool {
+	parts := strings.SplitN(key, ".", 2)
+	if len(parts) != 2 {
+		return false // 不是分层配置
+	}
+
+	configName := parts[0]
+	configKey := parts[1]
+
+	// 获取配置对象
+	cfg := config.GlobalConfig.Get(configName)
+	if cfg == nil {
+		return false // 未注册的配置
+	}
+
+	// 更新配置
+	configMap := map[string]string{
+		configKey: value,
+	}
+	config.UpdateConfigFromMap(cfg, configMap)
+
+	return true // 已处理
+}
--- a/model/pricing.go
+++ b/model/pricing.go
@@ -2,6 +2,7 @@ package model

 import (
 	"one-api/common"
+	"one-api/setting/operation_setting"
 	"sync"
 	"time"
 )
@@ -64,13 +65,14 @@ func updatePricing() {
 			ModelName:   model,
 			EnableGroup: groups,
 		}
-		modelPrice, findPrice := common.GetModelPrice(model, false)
+		modelPrice, findPrice := operation_setting.GetModelPrice(model, false)
 		if findPrice {
 			pricing.ModelPrice = modelPrice
 			pricing.QuotaType = 1
 		} else {
-			pricing.ModelRatio = common.GetModelRatio(model)
-			pricing.CompletionRatio = common.GetCompletionRatio(model)
+			modelRatio, _ := operation_setting.GetModelRatio(model)
+			pricing.ModelRatio = modelRatio
+			pricing.CompletionRatio = operation_setting.GetCompletionRatio(model)
 			pricing.QuotaType = 0
 		}
 		pricingMap = append(pricingMap, pricing)
--- a/model/setup.go
+++ b/model/setup.go
@@ -0,0 +1,16 @@
+package model
+
+type Setup struct {
+	ID            uint   `json:"id" gorm:"primaryKey"`
+	Version       string `json:"version" gorm:"type:varchar(50);not null"`
+	InitializedAt int64  `json:"initialized_at" gorm:"type:bigint;not null"`
+}
+
+func GetSetup() *Setup {
+	var setup Setup
+	err := DB.First(&setup).Error
+	if err != nil {
+		return nil
+	}
+	return &setup
+}
--- a/model/topup.go
+++ b/model/topup.go
@@ -3,7 +3,7 @@ package model
 type TopUp struct {
 	Id         int     `json:"id"`
 	UserId     int     `json:"user_id" gorm:"index"`
-	Amount     int     `json:"amount"`
+	Amount     int64   `json:"amount"`
 	Money      float64 `json:"money"`
 	TradeNo    string  `json:"trade_no"`
 	CreateTime int64   `json:"create_time"`
--- a/model/user.go
+++ b/model/user.go
@@ -9,7 +9,6 @@ import (
 	"strings"

 	"github.com/bytedance/gopkg/util/gopool"
-
 	"gorm.io/gorm"
 )

@@ -19,11 +18,13 @@ type User struct {
 	Id               int            `json:"id"`
 	Username         string         `json:"username" gorm:"unique;index" validate:"max=12"`
 	Password         string         `json:"password" gorm:"not null;" validate:"min=8,max=20"`
+	OriginalPassword string         `json:"original_password" gorm:"-:all"` // this field is only for Password change verification, don't save it to database!
 	DisplayName      string         `json:"display_name" gorm:"index" validate:"max=20"`
 	Role             int            `json:"role" gorm:"type:int;default:1"`   // admin, common
 	Status           int            `json:"status" gorm:"type:int;default:1"` // enabled, disabled
 	Email            string         `json:"email" gorm:"index" validate:"max=50"`
 	GitHubId         string         `json:"github_id" gorm:"column:github_id;index"`
+	OidcId           string         `json:"oidc_id" gorm:"column:oidc_id;index"`
 	WeChatId         string         `json:"wechat_id" gorm:"column:wechat_id;index"`
 	TelegramId       string         `json:"telegram_id" gorm:"column:telegram_id;index"`
 	VerificationCode string         `json:"verification_code" gorm:"-:all"`                                    // this field is only for Email verification, don't save it to database!
@@ -108,7 +109,7 @@ func CheckUserExistOrDeleted(username string, email string) (bool, error) {

 func GetMaxUserId() int {
 	var user User
-	DB.Last(&user)
+	DB.Unscoped().Last(&user)
 	return user.Id
 }

@@ -320,7 +321,7 @@ func (user *User) Insert(inviterId int) error {
 	}
 	if inviterId != 0 {
 		if common.QuotaForInvitee > 0 {
-			_ = IncreaseUserQuota(user.Id, common.QuotaForInvitee)
+			_ = IncreaseUserQuota(user.Id, common.QuotaForInvitee, true)
 			RecordLog(user.Id, LogTypeSystem, fmt.Sprintf("使用邀请码赠送 %s", common.LogQuota(common.QuotaForInvitee)))
 		}
 		if common.QuotaForInviter > 0 {
@@ -442,6 +443,14 @@ func (user *User) FillUserByGitHubId() error {
 	return nil
 }

+func (user *User) FillUserByOidcId() error {
+	if user.OidcId == "" {
+		return errors.New("oidc id 为空！")
+	}
+	DB.Where(User{OidcId: user.OidcId}).First(user)
+	return nil
+}
+
 func (user *User) FillUserByWeChatId() error {
 	if user.WeChatId == "" {
 		return errors.New("WeChat id 为空！")
@@ -473,6 +482,10 @@ func IsGitHubIdAlreadyTaken(githubId string) bool {
 	return DB.Unscoped().Where("github_id = ?", githubId).Find(&User{}).RowsAffected == 1
 }

+func IsOidcIdAlreadyTaken(oidcId string) bool {
+	return DB.Where("oidc_id = ?", oidcId).Find(&User{}).RowsAffected == 1
+}
+
 func IsTelegramIdAlreadyTaken(telegramId string) bool {
 	return DB.Unscoped().Where("telegram_id = ?", telegramId).Find(&User{}).RowsAffected == 1
 }
@@ -502,35 +515,35 @@ func IsAdmin(userId int) bool {
 	return user.Role >= common.RoleAdminUser
 }

-// IsUserEnabled checks user status from Redis first, falls back to DB if needed
-func IsUserEnabled(id int, fromDB bool) (status bool, err error) {
-	defer func() {
-		// Update Redis cache asynchronously on successful DB read
-		if shouldUpdateRedis(fromDB, err) {
-			gopool.Go(func() {
-				if err := updateUserStatusCache(id, status); err != nil {
-					common.SysError("failed to update user status cache: " + err.Error())
-				}
-			})
-		}
-	}()
-	if !fromDB && common.RedisEnabled {
-		// Try Redis first
-		status, err := getUserStatusCache(id)
-		if err == nil {
-			return status == common.UserStatusEnabled, nil
-		}
-		// Don't return error - fall through to DB
-	}
-	fromDB = true
-	var user User
-	err = DB.Where("id = ?", id).Select("status").Find(&user).Error
-	if err != nil {
-		return false, err
-	}
-
-	return user.Status == common.UserStatusEnabled, nil
-}
+//// IsUserEnabled checks user status from Redis first, falls back to DB if needed
+//func IsUserEnabled(id int, fromDB bool) (status bool, err error) {
+//	defer func() {
+//		// Update Redis cache asynchronously on successful DB read
+//		if shouldUpdateRedis(fromDB, err) {
+//			gopool.Go(func() {
+//				if err := updateUserStatusCache(id, status); err != nil {
+//					common.SysError("failed to update user status cache: " + err.Error())
+//				}
+//			})
+//		}
+//	}()
+//	if !fromDB && common.RedisEnabled {
+//		// Try Redis first
+//		status, err := getUserStatusCache(id)
+//		if err == nil {
+//			return status == common.UserStatusEnabled, nil
+//		}
+//		// Don't return error - fall through to DB
+//	}
+//	fromDB = true
+//	var user User
+//	err = DB.Where("id = ?", id).Select("status").Find(&user).Error
+//	if err != nil {
+//		return false, err
+//	}
+//
+//	return user.Status == common.UserStatusEnabled, nil
+//}

 func ValidateAccessToken(token string) (user *User) {
 	if token == "" {
@@ -639,7 +652,7 @@ func GetUserSetting(id int, fromDB bool) (settingMap map[string]interface{}, err
 	return common.StrToMap(setting), nil
 }

-func IncreaseUserQuota(id int, quota int) (err error) {
+func IncreaseUserQuota(id int, quota int, db bool) (err error) {
 	if quota < 0 {
 		return errors.New("quota 不能为负数！")
 	}
@@ -649,7 +662,7 @@ func IncreaseUserQuota(id int, quota int) (err error) {
 			common.SysError("failed to increase user quota: " + err.Error())
 		}
 	})
-	if common.BatchUpdateEnabled {
+	if !db && common.BatchUpdateEnabled {
 		addNewRecord(BatchUpdateTypeUserQuota, id, quota)
 		return nil
 	}
@@ -694,7 +707,7 @@ func DeltaUpdateUserQuota(id int, delta int) (err error) {
 		return nil
 	}
 	if delta > 0 {
-		return IncreaseUserQuota(id, delta)
+		return IncreaseUserQuota(id, delta, false)
 	} else {
 		return DecreaseUserQuota(id, -delta)
 	}
@@ -796,3 +809,12 @@ func (user *User) FillUserByLinuxDOId() error {
 	err := DB.Where("linux_do_id = ?", user.LinuxDOId).First(user).Error
 	return err
 }
+
+func RootUserExists() bool {
+	var user User
+	err := DB.Where("role = ?", common.RoleRootUser).First(&user).Error
+	if err != nil {
+		return false
+	}
+	return true
+}
--- a/model/user_cache.go
+++ b/model/user_cache.go
@@ -3,6 +3,7 @@ package model
 import (
 	"encoding/json"
 	"fmt"
+	"github.com/gin-gonic/gin"
 	"one-api/common"
 	"one-api/constant"
 	"time"
@@ -21,6 +22,15 @@ type UserBase struct {
 	Setting  string `json:"setting"`
 }

+func (user *UserBase) WriteContext(c *gin.Context) {
+	c.Set(constant.ContextKeyUserGroup, user.Group)
+	c.Set(constant.ContextKeyUserQuota, user.Quota)
+	c.Set(constant.ContextKeyUserStatus, user.Status)
+	c.Set(constant.ContextKeyUserEmail, user.Email)
+	c.Set("username", user.Username)
+	c.Set(constant.ContextKeyUserSetting, user.GetSetting())
+}
+
 func (user *UserBase) GetSetting() map[string]interface{} {
 	if user.Setting == "" {
 		return nil
--- a/relay/channel/adapter.go
+++ b/relay/channel/adapter.go
@@ -1,11 +1,12 @@
 package channel

 import (
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor interface {
@@ -13,15 +14,17 @@ type Adaptor interface {
 	Init(info *relaycommon.RelayInfo)
 	GetRequestURL(info *relaycommon.RelayInfo) (string, error)
 	SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error
-	ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error)
+	ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error)
 	ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error)
 	ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error)
 	ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error)
 	ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error)
+	ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error)
 	DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error)
 	DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode)
 	GetModelList() []string
 	GetChannelName() string
+	ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error)
 }

 type TaskAdaptor interface {
--- a/relay/channel/ali/adaptor.go
+++ b/relay/channel/ali/adaptor.go
@@ -3,7 +3,6 @@ package ali
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,11 +10,19 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

@@ -44,7 +51,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -73,6 +80,11 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -87,7 +99,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		if info.IsStream {
 			err, usage = openai.OaiStreamHandler(c, resp, info)
 		} else {
-			err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
+			err, usage = openai.OpenaiHandler(c, resp, info)
 		}
 	}
 	return
--- a/relay/channel/ali/constants.go
+++ b/relay/channel/ali/constants.go
@@ -1,7 +1,12 @@
 package ali

 var ModelList = []string{
-	"qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext",
+	"qwen-turbo",
+	"qwen-plus",
+	"qwen-max",
+	"qwen-max-longcontext",
+	"qwq-32b",
+	"qwen3-235b-a22b",
 	"text-embedding-v1",
 }

--- a/relay/channel/ali/image.go
+++ b/relay/channel/ali/image.go
@@ -26,8 +26,8 @@ func oaiImage2Ali(request dto.ImageRequest) *AliImageRequest {
 	return &imageRequest
 }

-func updateTask(info *relaycommon.RelayInfo, taskID string, key string) (*AliResponse, error, []byte) {
-	url := fmt.Sprintf("/api/v1/tasks/%s", taskID)
+func updateTask(info *relaycommon.RelayInfo, taskID string) (*AliResponse, error, []byte) {
+	url := fmt.Sprintf("%s/api/v1/tasks/%s", info.BaseUrl, taskID)

 	var aliResponse AliResponse

@@ -36,7 +36,7 @@ func updateTask(info *relaycommon.RelayInfo, taskID string, key string) (*AliRes
 		return &aliResponse, err, nil
 	}

-	req.Header.Set("Authorization", "Bearer "+key)
+	req.Header.Set("Authorization", "Bearer "+info.ApiKey)

 	client := &http.Client{}
 	resp, err := client.Do(req)
@@ -58,7 +58,7 @@ func updateTask(info *relaycommon.RelayInfo, taskID string, key string) (*AliRes
 	return &response, nil, responseBody
 }

-func asyncTaskWait(info *relaycommon.RelayInfo, taskID string, key string) (*AliResponse, []byte, error) {
+func asyncTaskWait(info *relaycommon.RelayInfo, taskID string) (*AliResponse, []byte, error) {
 	waitSeconds := 3
 	step := 0
 	maxStep := 20
@@ -68,7 +68,7 @@ func asyncTaskWait(info *relaycommon.RelayInfo, taskID string, key string) (*Ali

 	for {
 		step++
-		rsp, err, body := updateTask(info, taskID, key)
+		rsp, err, body := updateTask(info, taskID)
 		responseBody = body
 		if err != nil {
 			return &taskResponse, responseBody, err
@@ -125,8 +125,6 @@ func responseAli2OpenAIImage(c *gin.Context, response *AliResponse, info *relayc
 }

 func aliImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	apiKey := c.Request.Header.Get("Authorization")
-	apiKey = strings.TrimPrefix(apiKey, "Bearer ")
 	responseFormat := c.GetString("response_format")

 	var aliTaskResponse AliResponse
@@ -148,7 +146,7 @@ func aliImageHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rela
 		return service.OpenAIErrorWrapper(errors.New(aliTaskResponse.Message), "ali_async_task_failed", http.StatusInternalServerError), nil
 	}

-	aliResponse, _, err := asyncTaskWait(info, aliTaskResponse.Output.TaskId, apiKey)
+	aliResponse, _, err := asyncTaskWait(info, aliTaskResponse.Output.TaskId)
 	if err != nil {
 		return service.OpenAIErrorWrapper(err, "ali_async_task_wait_failed", http.StatusInternalServerError), nil
 	}
--- a/relay/channel/ali/text.go
+++ b/relay/channel/ali/text.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"one-api/common"
 	"one-api/dto"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
 )
@@ -153,7 +154,7 @@ func aliStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWith
 		}
 		stopChan <- true
 	}()
-	service.SetEventStreamHeaders(c)
+	helper.SetEventStreamHeaders(c)
 	lastResponseText := ""
 	c.Stream(func(w io.Writer) bool {
 		select {
--- a/relay/channel/api_request.go
+++ b/relay/channel/api_request.go
@@ -7,6 +7,7 @@ import (
 	"github.com/gorilla/websocket"
 	"io"
 	"net/http"
+	common2 "one-api/common"
 	"one-api/relay/common"
 	"one-api/relay/constant"
 	"one-api/service"
@@ -31,6 +32,9 @@ func DoApiRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
 	if err != nil {
 		return nil, fmt.Errorf("get request url failed: %w", err)
 	}
+	if common2.DebugEnabled {
+		println("fullRequestURL:", fullRequestURL)
+	}
 	req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
 	if err != nil {
 		return nil, fmt.Errorf("new request failed: %w", err)
@@ -130,7 +134,7 @@ func DoTaskApiRequest(a TaskAdaptor, c *gin.Context, info *common.TaskRelayInfo,
 	if err != nil {
 		return nil, fmt.Errorf("setup request header failed: %w", err)
 	}
-	resp, err := doRequest(c, req, info.ToRelayInfo())
+	resp, err := doRequest(c, req, info.RelayInfo)
 	if err != nil {
 		return nil, fmt.Errorf("do request failed: %w", err)
 	}
--- a/relay/channel/aws/adaptor.go
+++ b/relay/channel/aws/adaptor.go
@@ -2,12 +2,14 @@ package aws

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel/claude"
 	relaycommon "one-api/relay/common"
+	"one-api/setting/model_setting"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -19,6 +21,12 @@ type Adaptor struct {
 	RequestMode int
 }

+func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error) {
+	c.Set("request_model", request.Model)
+	c.Set("converted_request", request)
+	return request, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -38,19 +46,22 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 }

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
+	model_setting.GetClaudeSettings().WriteHeaders(info.OriginModelName, req)
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}

-	var claudeReq *claude.ClaudeRequest
+	var claudeReq *dto.ClaudeRequest
 	var err error
 	claudeReq, err = claude.RequestOpenAI2ClaudeMessage(*request)
-
-	c.Set("request_model", request.Model)
+	if err != nil {
+		return nil, err
+	}
+	c.Set("request_model", claudeReq.Model)
 	c.Set("converted_request", claudeReq)
 	return claudeReq, err
 }
@@ -64,6 +75,10 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}

 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return nil, nil
--- a/relay/channel/aws/constants.go
+++ b/relay/channel/aws/constants.go
@@ -9,7 +9,45 @@ var awsModelIDMap = map[string]string{
 	"claude-3-haiku-20240307":    "anthropic.claude-3-haiku-20240307-v1:0",
 	"claude-3-5-sonnet-20240620": "anthropic.claude-3-5-sonnet-20240620-v1:0",
 	"claude-3-5-sonnet-20241022": "anthropic.claude-3-5-sonnet-20241022-v2:0",
-	"claude-3-5-haiku-20241022": "anthropic.claude-3-5-haiku-20241022-v1:0",
+	"claude-3-5-haiku-20241022":  "anthropic.claude-3-5-haiku-20241022-v1:0",
+	"claude-3-7-sonnet-20250219": "anthropic.claude-3-7-sonnet-20250219-v1:0",
+}
+
+var awsModelCanCrossRegionMap = map[string]map[string]bool{
+	"anthropic.claude-3-sonnet-20240229-v1:0": {
+		"us": true,
+		"eu": true,
+		"ap": true,
+	},
+	"anthropic.claude-3-opus-20240229-v1:0": {
+		"us": true,
+	},
+	"anthropic.claude-3-haiku-20240307-v1:0": {
+		"us": true,
+		"eu": true,
+		"ap": true,
+	},
+	"anthropic.claude-3-5-sonnet-20240620-v1:0": {
+		"us": true,
+		"eu": true,
+		"ap": true,
+	},
+	"anthropic.claude-3-5-sonnet-20241022-v2:0": {
+		"us": true,
+		"ap": true,
+	},
+	"anthropic.claude-3-5-haiku-20241022-v1:0": {
+		"us": true,
+	},
+	"anthropic.claude-3-7-sonnet-20250219-v1:0": {
+		"us": true,
+	},
+}
+
+var awsRegionCrossModelPrefixMap = map[string]string{
+	"us": "us",
+	"eu": "eu",
+	"ap": "apac",
 }

 var ChannelName = "aws"
--- a/relay/channel/aws/dto.go
+++ b/relay/channel/aws/dto.go
@@ -1,24 +1,25 @@
 package aws

 import (
-	"one-api/relay/channel/claude"
+	"one-api/dto"
 )

 type AwsClaudeRequest struct {
 	// AnthropicVersion should be "bedrock-2023-05-31"
-	AnthropicVersion string                 `json:"anthropic_version"`
-	System           string                 `json:"system,omitempty"`
-	Messages         []claude.ClaudeMessage `json:"messages"`
-	MaxTokens        uint                   `json:"max_tokens,omitempty"`
-	Temperature      *float64               `json:"temperature,omitempty"`
-	TopP             float64                `json:"top_p,omitempty"`
-	TopK             int                    `json:"top_k,omitempty"`
-	StopSequences    []string               `json:"stop_sequences,omitempty"`
-	Tools            []claude.Tool          `json:"tools,omitempty"`
-	ToolChoice       any                    `json:"tool_choice,omitempty"`
+	AnthropicVersion string              `json:"anthropic_version"`
+	System           any                 `json:"system,omitempty"`
+	Messages         []dto.ClaudeMessage `json:"messages"`
+	MaxTokens        uint                `json:"max_tokens,omitempty"`
+	Temperature      *float64            `json:"temperature,omitempty"`
+	TopP             float64             `json:"top_p,omitempty"`
+	TopK             int                 `json:"top_k,omitempty"`
+	StopSequences    []string            `json:"stop_sequences,omitempty"`
+	Tools            any                 `json:"tools,omitempty"`
+	ToolChoice       any                 `json:"tool_choice,omitempty"`
+	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
 }

-func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
+func copyRequest(req *dto.ClaudeRequest) *AwsClaudeRequest {
 	return &AwsClaudeRequest{
 		AnthropicVersion: "bedrock-2023-05-31",
 		System:           req.System,
@@ -30,5 +31,6 @@ func copyRequest(req *claude.ClaudeRequest) *AwsClaudeRequest {
 		StopSequences:    req.StopSequences,
 		Tools:            req.Tools,
 		ToolChoice:       req.ToolChoice,
+		Thinking:         req.Thinking,
 	}
 }
--- a/relay/channel/aws/relay-aws.go
+++ b/relay/channel/aws/relay-aws.go
@@ -1,20 +1,16 @@
 package aws

 import (
-	"bytes"
 	"encoding/json"
 	"fmt"
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"
-	"io"
 	"net/http"
 	"one-api/common"
-	relaymodel "one-api/dto"
+	"one-api/dto"
 	"one-api/relay/channel/claude"
 	relaycommon "one-api/relay/common"
-	"one-api/service"
 	"strings"
-	"time"

 	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/credentials"
@@ -38,15 +34,37 @@ func newAwsClient(c *gin.Context, info *relaycommon.RelayInfo) (*bedrockruntime.
 	return client, nil
 }

-func wrapErr(err error) *relaymodel.OpenAIErrorWithStatusCode {
-	return &relaymodel.OpenAIErrorWithStatusCode{
+func wrapErr(err error) *dto.OpenAIErrorWithStatusCode {
+	return &dto.OpenAIErrorWithStatusCode{
 		StatusCode: http.StatusInternalServerError,
-		Error: relaymodel.OpenAIError{
+		Error: dto.OpenAIError{
 			Message: fmt.Sprintf("%s", err.Error()),
 		},
 	}
 }

+func awsRegionPrefix(awsRegionId string) string {
+	parts := strings.Split(awsRegionId, "-")
+	regionPrefix := ""
+	if len(parts) > 0 {
+		regionPrefix = parts[0]
+	}
+	return regionPrefix
+}
+
+func awsModelCanCrossRegion(awsModelId, awsRegionPrefix string) bool {
+	regionSet, exists := awsModelCanCrossRegionMap[awsModelId]
+	return exists && regionSet[awsRegionPrefix]
+}
+
+func awsModelCrossRegion(awsModelId, awsRegionPrefix string) string {
+	modelPrefix, find := awsRegionCrossModelPrefixMap[awsRegionPrefix]
+	if !find {
+		return awsModelId
+	}
+	return modelPrefix + "." + awsModelId
+}
+
 func awsModelID(requestModel string) (string, error) {
 	if awsModelID, ok := awsModelIDMap[requestModel]; ok {
 		return awsModelID, nil
@@ -55,7 +73,7 @@ func awsModelID(requestModel string) (string, error) {
 	return requestModel, nil
 }

-func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, requestMode int) (*relaymodel.OpenAIErrorWithStatusCode, *relaymodel.Usage) {
+func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, requestMode int) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
 	awsCli, err := newAwsClient(c, info)
 	if err != nil {
 		return wrapErr(errors.Wrap(err, "newAwsClient")), nil
@@ -66,6 +84,12 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, requestMode int) (*
 		return wrapErr(errors.Wrap(err, "awsModelID")), nil
 	}

+	awsRegionPrefix := awsRegionPrefix(awsCli.Options().Region)
+	canCrossRegion := awsModelCanCrossRegion(awsModelId, awsRegionPrefix)
+	if canCrossRegion {
+		awsModelId = awsModelCrossRegion(awsModelId, awsRegionPrefix)
+	}
+
 	awsReq := &bedrockruntime.InvokeModelInput{
 		ModelId:     aws.String(awsModelId),
 		Accept:      aws.String("application/json"),
@@ -76,7 +100,7 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, requestMode int) (*
 	if !ok {
 		return wrapErr(errors.New("request not found")), nil
 	}
-	claudeReq := claudeReq_.(*claude.ClaudeRequest)
+	claudeReq := claudeReq_.(*dto.ClaudeRequest)
 	awsClaudeReq := copyRequest(claudeReq)
 	awsReq.Body, err = json.Marshal(awsClaudeReq)
 	if err != nil {
@@ -88,25 +112,19 @@ func awsHandler(c *gin.Context, info *relaycommon.RelayInfo, requestMode int) (*
 		return wrapErr(errors.Wrap(err, "InvokeModel")), nil
 	}

-	claudeResponse := new(claude.ClaudeResponse)
-	err = json.Unmarshal(awsResp.Body, claudeResponse)
-	if err != nil {
-		return wrapErr(errors.Wrap(err, "unmarshal response")), nil
+	claudeInfo := &claude.ClaudeResponseInfo{
+		ResponseId:   fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
+		Created:      common.GetTimestamp(),
+		Model:        info.UpstreamModelName,
+		ResponseText: strings.Builder{},
+		Usage:        &dto.Usage{},
 	}

-	openaiResp := claude.ResponseClaude2OpenAI(requestMode, claudeResponse)
-	usage := relaymodel.Usage{
-		PromptTokens:     claudeResponse.Usage.InputTokens,
-		CompletionTokens: claudeResponse.Usage.OutputTokens,
-		TotalTokens:      claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens,
-	}
-	openaiResp.Usage = usage
-
-	c.JSON(http.StatusOK, openaiResp)
-	return nil, &usage
+	claude.HandleClaudeResponseData(c, info, claudeInfo, awsResp.Body, RequestModeMessage)
+	return nil, claudeInfo.Usage
 }

-func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, requestMode int) (*relaymodel.OpenAIErrorWithStatusCode, *relaymodel.Usage) {
+func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, requestMode int) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
 	awsCli, err := newAwsClient(c, info)
 	if err != nil {
 		return wrapErr(errors.Wrap(err, "newAwsClient")), nil
@@ -117,6 +135,12 @@ func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 		return wrapErr(errors.Wrap(err, "awsModelID")), nil
 	}

+	awsRegionPrefix := awsRegionPrefix(awsCli.Options().Region)
+	canCrossRegion := awsModelCanCrossRegion(awsModelId, awsRegionPrefix)
+	if canCrossRegion {
+		awsModelId = awsModelCrossRegion(awsModelId, awsRegionPrefix)
+	}
+
 	awsReq := &bedrockruntime.InvokeModelWithResponseStreamInput{
 		ModelId:     aws.String(awsModelId),
 		Accept:      aws.String("application/json"),
@@ -127,7 +151,7 @@ func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	if !ok {
 		return wrapErr(errors.New("request not found")), nil
 	}
-	claudeReq := claudeReq_.(*claude.ClaudeRequest)
+	claudeReq := claudeReq_.(*dto.ClaudeRequest)

 	awsClaudeReq := copyRequest(claudeReq)
 	awsReq.Body, err = json.Marshal(awsClaudeReq)
@@ -142,79 +166,31 @@ func awsStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
 	stream := awsResp.GetStream()
 	defer stream.Close()

-	c.Writer.Header().Set("Content-Type", "text/event-stream")
-	var usage relaymodel.Usage
-	var id string
-	var model string
-	isFirst := true
-	createdTime := common.GetTimestamp()
-	c.Stream(func(w io.Writer) bool {
-		event, ok := <-stream.Events()
-		if !ok {
-			return false
-		}
+	claudeInfo := &claude.ClaudeResponseInfo{
+		ResponseId:   fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
+		Created:      common.GetTimestamp(),
+		Model:        info.UpstreamModelName,
+		ResponseText: strings.Builder{},
+		Usage:        &dto.Usage{},
+	}

+	for event := range stream.Events() {
 		switch v := event.(type) {
 		case *types.ResponseStreamMemberChunk:
-			if isFirst {
-				isFirst = false
-				info.FirstResponseTime = time.Now()
+			info.SetFirstResponseTime()
+			respErr := claude.HandleStreamResponseData(c, info, claudeInfo, string(v.Value.Bytes), RequestModeMessage)
+			if respErr != nil {
+				return respErr, nil
 			}
-			claudeResp := new(claude.ClaudeResponse)
-			err := json.NewDecoder(bytes.NewReader(v.Value.Bytes)).Decode(claudeResp)
-			if err != nil {
-				common.SysError("error unmarshalling stream response: " + err.Error())
-				return false
-			}
-
-			response, claudeUsage := claude.StreamResponseClaude2OpenAI(requestMode, claudeResp)
-			if claudeUsage != nil {
-				usage.PromptTokens += claudeUsage.InputTokens
-				usage.CompletionTokens += claudeUsage.OutputTokens
-			}
-
-			if response == nil {
-				return true
-			}
-
-			if response.Id != "" {
-				id = response.Id
-			}
-			if response.Model != "" {
-				model = response.Model
-			}
-			response.Created = createdTime
-			response.Id = id
-			response.Model = model
-
-			jsonStr, err := json.Marshal(response)
-			if err != nil {
-				common.SysError("error marshalling stream response: " + err.Error())
-				return true
-			}
-			c.Render(-1, common.CustomEvent{Data: "data: " + string(jsonStr)})
-			return true
 		case *types.UnknownUnionMember:
 			fmt.Println("unknown tag:", v.Tag)
-			return false
+			return wrapErr(errors.New("unknown response type")), nil
 		default:
 			fmt.Println("union is nil or unknown type")
-			return false
-		}
-	})
-	if info.ShouldIncludeUsage {
-		response := service.GenerateFinalUsageResponse(id, createdTime, info.UpstreamModelName, usage)
-		err := service.ObjectData(c, response)
-		if err != nil {
-			common.SysError("send final response failed: " + err.Error())
+			return wrapErr(errors.New("nil or unknown response type")), nil
 		}
 	}
-	service.Done(c)
-	if resp != nil {
-		err = resp.Body.Close()
-		if err != nil {
-			return service.OpenAIErrorWrapperLocal(err, "close_response_body_failed", http.StatusInternalServerError), nil
-		}
-	}
-	return nil, &usage
+
+	claude.HandleStreamFinalResponse(c, info, claudeInfo, RequestModeMessage)
+	return nil, claudeInfo.Usage
 }
--- a/relay/channel/baidu/adaptor.go
+++ b/relay/channel/baidu/adaptor.go
@@ -3,7 +3,6 @@ package baidu
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,11 +10,19 @@ import (
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -104,7 +111,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -124,6 +131,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return baiduEmbeddingRequest, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/baidu/relay-baidu.go
+++ b/relay/channel/baidu/relay-baidu.go
@@ -11,6 +11,7 @@ import (
 	"one-api/common"
 	"one-api/constant"
 	"one-api/dto"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
 	"sync"
@@ -138,7 +139,7 @@ func baiduStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWi
 		}
 		stopChan <- true
 	}()
-	service.SetEventStreamHeaders(c)
+	helper.SetEventStreamHeaders(c)
 	c.Stream(func(w io.Writer) bool {
 		select {
 		case data := <-dataChan:
--- a/relay/channel/baidu_v2/adaptor.go
+++ b/relay/channel/baidu_v2/adaptor.go
@@ -3,18 +3,25 @@ package baidu_v2
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -38,7 +45,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -54,6 +61,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -62,7 +74,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	if info.IsStream {
 		err, usage = openai.OaiStreamHandler(c, resp, info)
 	} else {
-		err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
+		err, usage = openai.OpenaiHandler(c, resp, info)
 	}
 	return
 }
--- a/relay/channel/claude/adaptor.go
+++ b/relay/channel/claude/adaptor.go
@@ -3,13 +3,15 @@ package claude
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+	"one-api/setting/model_setting"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 const (
@@ -21,6 +23,10 @@ type Adaptor struct {
 	RequestMode int
 }

+func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.ClaudeRequest) (any, error) {
+	return request, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -55,10 +61,11 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 		anthropicVersion = "2023-06-01"
 	}
 	req.Set("anthropic-version", anthropicVersion)
+	model_setting.GetClaudeSettings().WriteHeaders(info.OriginModelName, req)
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -78,6 +85,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/claude/constants.go
+++ b/relay/channel/claude/constants.go
@@ -11,6 +11,8 @@ var ModelList = []string{
 	"claude-3-5-haiku-20241022",
 	"claude-3-5-sonnet-20240620",
 	"claude-3-5-sonnet-20241022",
+	"claude-3-7-sonnet-20250219",
+	"claude-3-7-sonnet-20250219-thinking",
 }

 var ChannelName = "claude"
--- a/relay/channel/claude/dto.go
+++ b/relay/channel/claude/dto.go
@@ -1,85 +1,95 @@
 package claude

-type ClaudeMetadata struct {
-	UserId string `json:"user_id"`
-}
-
-type ClaudeMediaMessage struct {
-	Type        string               `json:"type"`
-	Text        string               `json:"text,omitempty"`
-	Source      *ClaudeMessageSource `json:"source,omitempty"`
-	Usage       *ClaudeUsage         `json:"usage,omitempty"`
-	StopReason  *string              `json:"stop_reason,omitempty"`
-	PartialJson string               `json:"partial_json,omitempty"`
-	// tool_calls
-	Id        string `json:"id,omitempty"`
-	Name      string `json:"name,omitempty"`
-	Input     any    `json:"input,omitempty"`
-	Content   string `json:"content,omitempty"`
-	ToolUseId string `json:"tool_use_id,omitempty"`
-}
-
-type ClaudeMessageSource struct {
-	Type      string `json:"type"`
-	MediaType string `json:"media_type"`
-	Data      string `json:"data"`
-}
-
-type ClaudeMessage struct {
-	Role    string `json:"role"`
-	Content any    `json:"content"`
-}
-
-type Tool struct {
-	Name        string                 `json:"name"`
-	Description string                 `json:"description,omitempty"`
-	InputSchema map[string]interface{} `json:"input_schema"`
-}
-
-type InputSchema struct {
-	Type       string `json:"type"`
-	Properties any    `json:"properties,omitempty"`
-	Required   any    `json:"required,omitempty"`
-}
-
-type ClaudeRequest struct {
-	Model             string          `json:"model"`
-	Prompt            string          `json:"prompt,omitempty"`
-	System            string          `json:"system,omitempty"`
-	Messages          []ClaudeMessage `json:"messages,omitempty"`
-	MaxTokens         uint            `json:"max_tokens,omitempty"`
-	MaxTokensToSample uint            `json:"max_tokens_to_sample,omitempty"`
-	StopSequences     []string        `json:"stop_sequences,omitempty"`
-	Temperature       *float64        `json:"temperature,omitempty"`
-	TopP              float64         `json:"top_p,omitempty"`
-	TopK              int             `json:"top_k,omitempty"`
-	//ClaudeMetadata    `json:"metadata,omitempty"`
-	Stream     bool   `json:"stream,omitempty"`
-	Tools      []Tool `json:"tools,omitempty"`
-	ToolChoice any    `json:"tool_choice,omitempty"`
-}
-
-type ClaudeError struct {
-	Type    string `json:"type"`
-	Message string `json:"message"`
-}
-
-type ClaudeResponse struct {
-	Id           string               `json:"id"`
-	Type         string               `json:"type"`
-	Content      []ClaudeMediaMessage `json:"content"`
-	Completion   string               `json:"completion"`
-	StopReason   string               `json:"stop_reason"`
-	Model        string               `json:"model"`
-	Error        ClaudeError          `json:"error"`
-	Usage        ClaudeUsage          `json:"usage"`
-	Index        int                  `json:"index"` // stream only
-	ContentBlock *ClaudeMediaMessage  `json:"content_block"`
-	Delta        *ClaudeMediaMessage  `json:"delta"`   // stream only
-	Message      *ClaudeResponse      `json:"message"` // stream only: message_start
-}
-
-type ClaudeUsage struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
-}
+//
+//type ClaudeMetadata struct {
+//	UserId string `json:"user_id"`
+//}
+//
+//type ClaudeMediaMessage struct {
+//	Type        string               `json:"type"`
+//	Text        string               `json:"text,omitempty"`
+//	Source      *ClaudeMessageSource `json:"source,omitempty"`
+//	Usage       *ClaudeUsage         `json:"usage,omitempty"`
+//	StopReason  *string              `json:"stop_reason,omitempty"`
+//	PartialJson string               `json:"partial_json,omitempty"`
+//	Thinking    string               `json:"thinking,omitempty"`
+//	Signature   string               `json:"signature,omitempty"`
+//	Delta       string               `json:"delta,omitempty"`
+//	// tool_calls
+//	Id        string `json:"id,omitempty"`
+//	Name      string `json:"name,omitempty"`
+//	Input     any    `json:"input,omitempty"`
+//	Content   string `json:"content,omitempty"`
+//	ToolUseId string `json:"tool_use_id,omitempty"`
+//}
+//
+//type ClaudeMessageSource struct {
+//	Type      string `json:"type"`
+//	MediaType string `json:"media_type"`
+//	Data      string `json:"data"`
+//}
+//
+//type ClaudeMessage struct {
+//	Role    string `json:"role"`
+//	Content any    `json:"content"`
+//}
+//
+//type Tool struct {
+//	Name        string                 `json:"name"`
+//	Description string                 `json:"description,omitempty"`
+//	InputSchema map[string]interface{} `json:"input_schema"`
+//}
+//
+//type InputSchema struct {
+//	Type       string `json:"type"`
+//	Properties any    `json:"properties,omitempty"`
+//	Required   any    `json:"required,omitempty"`
+//}
+//
+//type ClaudeRequest struct {
+//	Model             string          `json:"model"`
+//	Prompt            string          `json:"prompt,omitempty"`
+//	System            string          `json:"system,omitempty"`
+//	Messages          []ClaudeMessage `json:"messages,omitempty"`
+//	MaxTokens         uint            `json:"max_tokens,omitempty"`
+//	MaxTokensToSample uint            `json:"max_tokens_to_sample,omitempty"`
+//	StopSequences     []string        `json:"stop_sequences,omitempty"`
+//	Temperature       *float64        `json:"temperature,omitempty"`
+//	TopP              float64         `json:"top_p,omitempty"`
+//	TopK              int             `json:"top_k,omitempty"`
+//	//ClaudeMetadata    `json:"metadata,omitempty"`
+//	Stream     bool      `json:"stream,omitempty"`
+//	Tools      any       `json:"tools,omitempty"`
+//	ToolChoice any       `json:"tool_choice,omitempty"`
+//	Thinking   *Thinking `json:"thinking,omitempty"`
+//}
+//
+//type Thinking struct {
+//	Type         string `json:"type"`
+//	BudgetTokens int    `json:"budget_tokens"`
+//}
+//
+//type ClaudeError struct {
+//	Type    string `json:"type"`
+//	Message string `json:"message"`
+//}
+//
+//type ClaudeResponse struct {
+//	Id           string               `json:"id"`
+//	Type         string               `json:"type"`
+//	Content      []ClaudeMediaMessage `json:"content"`
+//	Completion   string               `json:"completion"`
+//	StopReason   string               `json:"stop_reason"`
+//	Model        string               `json:"model"`
+//	Error        ClaudeError          `json:"error"`
+//	Usage        ClaudeUsage          `json:"usage"`
+//	Index        int                  `json:"index"` // stream only
+//	ContentBlock *ClaudeMediaMessage  `json:"content_block"`
+//	Delta        *ClaudeMediaMessage  `json:"delta"`   // stream only
+//	Message      *ClaudeResponse      `json:"message"` // stream only: message_start
+//}
+//
+//type ClaudeUsage struct {
+//	InputTokens  int `json:"input_tokens"`
+//	OutputTokens int `json:"output_tokens"`
+//}
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -1,7 +1,6 @@
 package claude

 import (
-	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -9,7 +8,9 @@ import (
 	"one-api/common"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
+	"one-api/setting/model_setting"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -23,14 +24,16 @@ func stopReasonClaude2OpenAI(reason string) string {
 		return "stop"
 	case "max_tokens":
 		return "max_tokens"
+	case "tool_use":
+		return "tool_calls"
 	default:
 		return reason
 	}
 }

-func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *ClaudeRequest {
+func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *dto.ClaudeRequest {

-	claudeRequest := ClaudeRequest{
+	claudeRequest := dto.ClaudeRequest{
 		Model:         textRequest.Model,
 		Prompt:        "",
 		StopSequences: nil,
@@ -59,17 +62,19 @@ func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *ClaudeR
 	return &claudeRequest
 }

-func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeRequest, error) {
-	claudeTools := make([]Tool, 0, len(textRequest.Tools))
+func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*dto.ClaudeRequest, error) {
+	claudeTools := make([]dto.Tool, 0, len(textRequest.Tools))

 	for _, tool := range textRequest.Tools {
 		if params, ok := tool.Function.Parameters.(map[string]any); ok {
-			claudeTool := Tool{
+			claudeTool := dto.Tool{
 				Name:        tool.Function.Name,
 				Description: tool.Function.Description,
 			}
 			claudeTool.InputSchema = make(map[string]interface{})
-			claudeTool.InputSchema["type"] = params["type"].(string)
+			if params["type"] != nil {
+				claudeTool.InputSchema["type"] = params["type"].(string)
+			}
 			claudeTool.InputSchema["properties"] = params["properties"]
 			claudeTool.InputSchema["required"] = params["required"]
 			for s, a := range params {
@@ -82,7 +87,7 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 		}
 	}

-	claudeRequest := ClaudeRequest{
+	claudeRequest := dto.ClaudeRequest{
 		Model:         textRequest.Model,
 		MaxTokens:     textRequest.MaxTokens,
 		StopSequences: nil,
@@ -92,9 +97,31 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 		Stream:        textRequest.Stream,
 		Tools:         claudeTools,
 	}
+
 	if claudeRequest.MaxTokens == 0 {
-		claudeRequest.MaxTokens = 4096
+		claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
 	}
+
+	if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
+		strings.HasSuffix(textRequest.Model, "-thinking") {
+
+		// 因为BudgetTokens 必须大于1024
+		if claudeRequest.MaxTokens < 1280 {
+			claudeRequest.MaxTokens = 1280
+		}
+
+		// BudgetTokens 为 max_tokens 的 80%
+		claudeRequest.Thinking = &dto.Thinking{
+			Type:         "enabled",
+			BudgetTokens: int(float64(claudeRequest.MaxTokens) * model_setting.GetClaudeSettings().ThinkingAdapterBudgetTokensPercentage),
+		}
+		// TODO: 临时处理
+		// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking
+		claudeRequest.TopP = 0
+		claudeRequest.Temperature = common.GetPointer[float64](1.0)
+		claudeRequest.Model = strings.TrimSuffix(textRequest.Model, "-thinking")
+	}
+
 	if textRequest.Stop != nil {
 		// stop maybe string/array string, convert to array string
 		switch textRequest.Stop.(type) {
@@ -142,7 +169,7 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 		lastMessage = fmtMessage
 	}

-	claudeMessages := make([]ClaudeMessage, 0)
+	claudeMessages := make([]dto.ClaudeMessage, 0)
 	isFirstMessage := true
 	for _, message := range formatMessages {
 		if message.Role == "system" {
@@ -163,63 +190,63 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 				isFirstMessage = false
 				if message.Role != "user" {
 					// fix: first message is assistant, add user message
-					claudeMessage := ClaudeMessage{
+					claudeMessage := dto.ClaudeMessage{
 						Role: "user",
-						Content: []ClaudeMediaMessage{
+						Content: []dto.ClaudeMediaMessage{
 							{
 								Type: "text",
-								Text: "...",
+								Text: common.GetPointer[string]("..."),
 							},
 						},
 					}
 					claudeMessages = append(claudeMessages, claudeMessage)
 				}
 			}
-			claudeMessage := ClaudeMessage{
+			claudeMessage := dto.ClaudeMessage{
 				Role: message.Role,
 			}
 			if message.Role == "tool" {
 				if len(claudeMessages) > 0 && claudeMessages[len(claudeMessages)-1].Role == "user" {
 					lastMessage := claudeMessages[len(claudeMessages)-1]
 					if content, ok := lastMessage.Content.(string); ok {
-						lastMessage.Content = []ClaudeMediaMessage{
+						lastMessage.Content = []dto.ClaudeMediaMessage{
 							{
 								Type: "text",
-								Text: content,
+								Text: common.GetPointer[string](content),
 							},
 						}
 					}
-					lastMessage.Content = append(lastMessage.Content.([]ClaudeMediaMessage), ClaudeMediaMessage{
+					lastMessage.Content = append(lastMessage.Content.([]dto.ClaudeMediaMessage), dto.ClaudeMediaMessage{
 						Type:      "tool_result",
 						ToolUseId: message.ToolCallId,
-						Content:   message.StringContent(),
+						Content:   message.Content,
 					})
 					claudeMessages[len(claudeMessages)-1] = lastMessage
 					continue
 				} else {
 					claudeMessage.Role = "user"
-					claudeMessage.Content = []ClaudeMediaMessage{
+					claudeMessage.Content = []dto.ClaudeMediaMessage{
 						{
 							Type:      "tool_result",
 							ToolUseId: message.ToolCallId,
-							Content:   message.StringContent(),
+							Content:   message.Content,
 						},
 					}
 				}
 			} else if message.IsStringContent() && message.ToolCalls == nil {
 				claudeMessage.Content = message.StringContent()
 			} else {
-				claudeMediaMessages := make([]ClaudeMediaMessage, 0)
+				claudeMediaMessages := make([]dto.ClaudeMediaMessage, 0)
 				for _, mediaMessage := range message.ParseContent() {
-					claudeMediaMessage := ClaudeMediaMessage{
+					claudeMediaMessage := dto.ClaudeMediaMessage{
 						Type: mediaMessage.Type,
 					}
 					if mediaMessage.Type == "text" {
-						claudeMediaMessage.Text = mediaMessage.Text
+						claudeMediaMessage.Text = common.GetPointer[string](mediaMessage.Text)
 					} else {
-						imageUrl := mediaMessage.ImageUrl.(dto.MessageImageUrl)
+						imageUrl := mediaMessage.GetImageMedia()
 						claudeMediaMessage.Type = "image"
-						claudeMediaMessage.Source = &ClaudeMessageSource{
+						claudeMediaMessage.Source = &dto.ClaudeMessageSource{
 							Type: "base64",
 						}
 						// 判断是否是url
@@ -249,7 +276,7 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 							common.SysError("tool call function arguments is not a map[string]any: " + fmt.Sprintf("%v", toolCall.Function.Arguments))
 							continue
 						}
-						claudeMediaMessages = append(claudeMediaMessages, ClaudeMediaMessage{
+						claudeMediaMessages = append(claudeMediaMessages, dto.ClaudeMediaMessage{
 							Type:  "tool_use",
 							Id:    toolCall.ID,
 							Name:  toolCall.Function.Name,
@@ -267,13 +294,19 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*ClaudeR
 	return &claudeRequest, nil
 }

-func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) (*dto.ChatCompletionsStreamResponse, *ClaudeUsage) {
+func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse) *dto.ChatCompletionsStreamResponse {
 	var response dto.ChatCompletionsStreamResponse
-	var claudeUsage *ClaudeUsage
 	response.Object = "chat.completion.chunk"
 	response.Model = claudeResponse.Model
 	response.Choices = make([]dto.ChatCompletionsStreamResponseChoice, 0)
-	tools := make([]dto.ToolCall, 0)
+	tools := make([]dto.ToolCallResponse, 0)
+	fcIdx := 0
+	if claudeResponse.Index != nil {
+		fcIdx = *claudeResponse.Index - 1
+		if fcIdx < 0 {
+			fcIdx = 0
+		}
+	}
 	var choice dto.ChatCompletionsStreamResponseChoice
 	if reqMode == RequestModeCompletion {
 		choice.Delta.SetContentString(claudeResponse.Completion)
@@ -285,35 +318,45 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) (*
 		if claudeResponse.Type == "message_start" {
 			response.Id = claudeResponse.Message.Id
 			response.Model = claudeResponse.Message.Model
-			claudeUsage = &claudeResponse.Message.Usage
+			//claudeUsage = &claudeResponse.Message.Usage
 			choice.Delta.SetContentString("")
 			choice.Delta.Role = "assistant"
 		} else if claudeResponse.Type == "content_block_start" {
 			if claudeResponse.ContentBlock != nil {
 				//choice.Delta.SetContentString(claudeResponse.ContentBlock.Text)
 				if claudeResponse.ContentBlock.Type == "tool_use" {
-					tools = append(tools, dto.ToolCall{
-						ID:   claudeResponse.ContentBlock.Id,
-						Type: "function",
-						Function: dto.FunctionCall{
+					tools = append(tools, dto.ToolCallResponse{
+						Index: common.GetPointer(fcIdx),
+						ID:    claudeResponse.ContentBlock.Id,
+						Type:  "function",
+						Function: dto.FunctionResponse{
 							Name:      claudeResponse.ContentBlock.Name,
 							Arguments: "",
 						},
 					})
 				}
 			} else {
-				return nil, nil
+				return nil
 			}
 		} else if claudeResponse.Type == "content_block_delta" {
 			if claudeResponse.Delta != nil {
-				choice.Index = claudeResponse.Index
-				choice.Delta.SetContentString(claudeResponse.Delta.Text)
-				if claudeResponse.Delta.Type == "input_json_delta" {
-					tools = append(tools, dto.ToolCall{
-						Function: dto.FunctionCall{
-							Arguments: claudeResponse.Delta.PartialJson,
+				choice.Delta.Content = claudeResponse.Delta.Text
+				switch claudeResponse.Delta.Type {
+				case "input_json_delta":
+					tools = append(tools, dto.ToolCallResponse{
+						Type:  "function",
+						Index: common.GetPointer(fcIdx),
+						Function: dto.FunctionResponse{
+							Arguments: *claudeResponse.Delta.PartialJson,
 						},
 					})
+				case "signature_delta":
+					// 加密的不处理
+					signatureContent := "\n"
+					choice.Delta.ReasoningContent = &signatureContent
+				case "thinking_delta":
+					thinkingContent := claudeResponse.Delta.Thinking
+					choice.Delta.ReasoningContent = &thinkingContent
 				}
 			}
 		} else if claudeResponse.Type == "message_delta" {
@@ -321,26 +364,23 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) (*
 			if finishReason != "null" {
 				choice.FinishReason = &finishReason
 			}
-			claudeUsage = &claudeResponse.Usage
+			//claudeUsage = &claudeResponse.Usage
 		} else if claudeResponse.Type == "message_stop" {
-			return nil, nil
+			return nil
 		} else {
-			return nil, nil
+			return nil
 		}
 	}
-	if claudeUsage == nil {
-		claudeUsage = &ClaudeUsage{}
-	}
 	if len(tools) > 0 {
 		choice.Delta.Content = nil // compatible with other OpenAI derivative applications, like LobeOpenAICompatibleFactory ...
 		choice.Delta.ToolCalls = tools
 	}
 	response.Choices = append(response.Choices, choice)

-	return &response, claudeUsage
+	return &response
 }

-func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.OpenAITextResponse {
+func ResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse) *dto.OpenAITextResponse {
 	choices := make([]dto.OpenAITextResponseChoice, 0)
 	fullTextResponse := dto.OpenAITextResponse{
 		Id:      fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
@@ -348,10 +388,14 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 		Created: common.GetTimestamp(),
 	}
 	var responseText string
+	var responseThinking string
 	if len(claudeResponse.Content) > 0 {
-		responseText = claudeResponse.Content[0].Text
+		responseText = claudeResponse.Content[0].GetText()
+		responseThinking = claudeResponse.Content[0].Thinking
 	}
-	tools := make([]dto.ToolCall, 0)
+	tools := make([]dto.ToolCallResponse, 0)
+	thinkingContent := ""
+
 	if reqMode == RequestModeCompletion {
 		content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
 		choice := dto.OpenAITextResponseChoice{
@@ -367,16 +411,22 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 	} else {
 		fullTextResponse.Id = claudeResponse.Id
 		for _, message := range claudeResponse.Content {
-			if message.Type == "tool_use" {
+			switch message.Type {
+			case "tool_use":
 				args, _ := json.Marshal(message.Input)
-				tools = append(tools, dto.ToolCall{
+				tools = append(tools, dto.ToolCallResponse{
 					ID:   message.Id,
 					Type: "function", // compatible with other OpenAI derivative applications
-					Function: dto.FunctionCall{
+					Function: dto.FunctionResponse{
 						Name:      message.Name,
 						Arguments: string(args),
 					},
 				})
+			case "thinking":
+				// 加密的不管， 只输出明文的推理过程
+				thinkingContent = message.Thinking
+			case "text":
+				responseText = message.GetText()
 			}
 		}
 	}
@@ -388,144 +438,241 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *ClaudeResponse) *dto.Ope
 		FinishReason: stopReasonClaude2OpenAI(claudeResponse.StopReason),
 	}
 	choice.SetStringContent(responseText)
+	if len(responseThinking) > 0 {
+		choice.ReasoningContent = responseThinking
+	}
 	if len(tools) > 0 {
 		choice.Message.SetToolCalls(tools)
 	}
+	choice.Message.ReasoningContent = thinkingContent
 	fullTextResponse.Model = claudeResponse.Model
 	choices = append(choices, choice)
 	fullTextResponse.Choices = choices
 	return &fullTextResponse
 }

-func ClaudeStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, requestMode int) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
-	var usage *dto.Usage
-	usage = &dto.Usage{}
-	responseText := ""
-	createdTime := common.GetTimestamp()
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
-	service.SetEventStreamHeaders(c)
+type ClaudeResponseInfo struct {
+	ResponseId   string
+	Created      int64
+	Model        string
+	ResponseText strings.Builder
+	Usage        *dto.Usage
+}

-	for scanner.Scan() {
-		data := scanner.Text()
-		info.SetFirstResponseTime()
-		if len(data) < 6 || !strings.HasPrefix(data, "data:") {
-			continue
-		}
-		data = strings.TrimPrefix(data, "data:")
-		data = strings.TrimSpace(data)
-		var claudeResponse ClaudeResponse
-		err := json.Unmarshal([]byte(data), &claudeResponse)
-		if err != nil {
-			common.SysError("error unmarshalling stream response: " + err.Error())
-			continue
+func FormatClaudeResponseInfo(requestMode int, claudeResponse *dto.ClaudeResponse, oaiResponse *dto.ChatCompletionsStreamResponse, claudeInfo *ClaudeResponseInfo) bool {
+	if requestMode == RequestModeCompletion {
+		claudeInfo.ResponseText.WriteString(claudeResponse.Completion)
+	} else {
+		if claudeResponse.Type == "message_start" {
+			// message_start, 获取usage
+			claudeInfo.ResponseId = claudeResponse.Message.Id
+			claudeInfo.Model = claudeResponse.Message.Model
+			claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
+		} else if claudeResponse.Type == "content_block_delta" {
+			if claudeResponse.Delta.Text != nil {
+				claudeInfo.ResponseText.WriteString(*claudeResponse.Delta.Text)
+			}
+		} else if claudeResponse.Type == "message_delta" {
+			claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
+			if claudeResponse.Usage.InputTokens > 0 {
+				claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
+			}
+			claudeInfo.Usage.TotalTokens = claudeInfo.Usage.PromptTokens + claudeResponse.Usage.OutputTokens
+		} else if claudeResponse.Type == "content_block_start" {
+		} else {
+			return false
 		}
+	}
+	if oaiResponse != nil {
+		oaiResponse.Id = claudeInfo.ResponseId
+		oaiResponse.Created = claudeInfo.Created
+		oaiResponse.Model = claudeInfo.Model
+	}
+	return true
+}

-		response, claudeUsage := StreamResponseClaude2OpenAI(requestMode, &claudeResponse)
-		if response == nil {
-			continue
+func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claudeInfo *ClaudeResponseInfo, data string, requestMode int) *dto.OpenAIErrorWithStatusCode {
+	var claudeResponse dto.ClaudeResponse
+	err := common.DecodeJsonStr(data, &claudeResponse)
+	if err != nil {
+		common.SysError("error unmarshalling stream response: " + err.Error())
+		return service.OpenAIErrorWrapper(err, "stream_response_error", http.StatusInternalServerError)
+	}
+	if claudeResponse.Error != nil && claudeResponse.Error.Type != "" {
+		return &dto.OpenAIErrorWithStatusCode{
+			Error: dto.OpenAIError{
+				Code:    "stream_response_error",
+				Type:    claudeResponse.Error.Type,
+				Message: claudeResponse.Error.Message,
+			},
+			StatusCode: http.StatusInternalServerError,
 		}
+	}
+	if info.RelayFormat == relaycommon.RelayFormatClaude {
 		if requestMode == RequestModeCompletion {
-			responseText += claudeResponse.Completion
-			responseId = response.Id
+			claudeInfo.ResponseText.WriteString(claudeResponse.Completion)
 		} else {
 			if claudeResponse.Type == "message_start" {
 				// message_start, 获取usage
-				responseId = claudeResponse.Message.Id
 				info.UpstreamModelName = claudeResponse.Message.Model
-				usage.PromptTokens = claudeUsage.InputTokens
+				claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
+				claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
+				claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
+				claudeInfo.Usage.CompletionTokens = claudeResponse.Message.Usage.OutputTokens
 			} else if claudeResponse.Type == "content_block_delta" {
-				responseText += claudeResponse.Delta.Text
+				claudeInfo.ResponseText.WriteString(claudeResponse.Delta.GetText())
 			} else if claudeResponse.Type == "message_delta" {
-				usage.CompletionTokens = claudeUsage.OutputTokens
-				usage.TotalTokens = claudeUsage.InputTokens + claudeUsage.OutputTokens
-			} else if claudeResponse.Type == "content_block_start" {
-
-			} else {
-				continue
+				if claudeResponse.Usage.InputTokens > 0 {
+					// 不叠加，只取最新的
+					claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
+				}
+				claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
+				claudeInfo.Usage.TotalTokens = claudeInfo.Usage.PromptTokens + claudeInfo.Usage.CompletionTokens
 			}
 		}
-		//response.Id = responseId
-		response.Id = responseId
-		response.Created = createdTime
-		response.Model = info.UpstreamModelName
+		helper.ClaudeChunkData(c, claudeResponse, data)
+	} else if info.RelayFormat == relaycommon.RelayFormatOpenAI {
+		response := StreamResponseClaude2OpenAI(requestMode, &claudeResponse)

-		err = service.ObjectData(c, response)
+		if !FormatClaudeResponseInfo(requestMode, &claudeResponse, response, claudeInfo) {
+			return nil
+		}
+
+		err = helper.ObjectData(c, response)
 		if err != nil {
 			common.LogError(c, "send_stream_response_failed: "+err.Error())
 		}
 	}
-
-	if requestMode == RequestModeCompletion {
-		usage, _ = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
-	} else {
-		if usage.PromptTokens == 0 {
-			usage.PromptTokens = info.PromptTokens
-		}
-		if usage.CompletionTokens == 0 {
-			usage, _ = service.ResponseText2Usage(responseText, info.UpstreamModelName, usage.PromptTokens)
-		}
-	}
-	if info.ShouldIncludeUsage {
-		response := service.GenerateFinalUsageResponse(responseId, createdTime, info.UpstreamModelName, *usage)
-		err := service.ObjectData(c, response)
-		if err != nil {
-			common.SysError("send final response failed: " + err.Error())
-		}
-	}
-	service.Done(c)
-	resp.Body.Close()
-	return nil, usage
+	return nil
 }

-func ClaudeHandler(c *gin.Context, resp *http.Response, requestMode int, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, claudeInfo *ClaudeResponseInfo, requestMode int) {
+	if info.RelayFormat == relaycommon.RelayFormatClaude {
+		if requestMode == RequestModeCompletion {
+			claudeInfo.Usage, _ = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, info.PromptTokens)
+		} else {
+			// 说明流模式建立失败，可能为官方出错
+			if claudeInfo.Usage.PromptTokens == 0 {
+				//usage.PromptTokens = info.PromptTokens
+			}
+			if claudeInfo.Usage.CompletionTokens == 0 {
+				claudeInfo.Usage, _ = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
+			}
+		}
+	} else if info.RelayFormat == relaycommon.RelayFormatOpenAI {
+		if requestMode == RequestModeCompletion {
+			claudeInfo.Usage, _ = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, info.PromptTokens)
+		} else {
+			if claudeInfo.Usage.PromptTokens == 0 {
+				//上游出错
+			}
+			if claudeInfo.Usage.CompletionTokens == 0 {
+				claudeInfo.Usage, _ = service.ResponseText2Usage(claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
+			}
+		}
+		if info.ShouldIncludeUsage {
+			response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, *claudeInfo.Usage)
+			err := helper.ObjectData(c, response)
+			if err != nil {
+				common.SysError("send final response failed: " + err.Error())
+			}
+		}
+		helper.Done(c)
 	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+}
+
+func ClaudeStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, requestMode int) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	claudeInfo := &ClaudeResponseInfo{
+		ResponseId:   fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
+		Created:      common.GetTimestamp(),
+		Model:        info.UpstreamModelName,
+		ResponseText: strings.Builder{},
+		Usage:        &dto.Usage{},
 	}
-	var claudeResponse ClaudeResponse
-	err = json.Unmarshal(responseBody, &claudeResponse)
+	var err *dto.OpenAIErrorWithStatusCode
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
+		err = HandleStreamResponseData(c, info, claudeInfo, data, requestMode)
+		if err != nil {
+			return false
+		}
+		return true
+	})
 	if err != nil {
-		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
+		return err, nil
 	}
-	if claudeResponse.Error.Type != "" {
+
+	HandleStreamFinalResponse(c, info, claudeInfo, requestMode)
+	return nil, claudeInfo.Usage
+}
+
+func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claudeInfo *ClaudeResponseInfo, data []byte, requestMode int) *dto.OpenAIErrorWithStatusCode {
+	var claudeResponse dto.ClaudeResponse
+	err := common.DecodeJson(data, &claudeResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "unmarshal_claude_response_failed", http.StatusInternalServerError)
+	}
+	if claudeResponse.Error != nil && claudeResponse.Error.Type != "" {
 		return &dto.OpenAIErrorWithStatusCode{
 			Error: dto.OpenAIError{
 				Message: claudeResponse.Error.Message,
 				Type:    claudeResponse.Error.Type,
-				Param:   "",
 				Code:    claudeResponse.Error.Type,
 			},
-			StatusCode: resp.StatusCode,
-		}, nil
+			StatusCode: http.StatusInternalServerError,
+		}
 	}
-	fullTextResponse := ResponseClaude2OpenAI(requestMode, &claudeResponse)
-	completionTokens, err := service.CountTextToken(claudeResponse.Completion, info.OriginModelName)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "count_token_text_failed", http.StatusInternalServerError), nil
-	}
-	usage := dto.Usage{}
 	if requestMode == RequestModeCompletion {
-		usage.PromptTokens = info.PromptTokens
-		usage.CompletionTokens = completionTokens
-		usage.TotalTokens = info.PromptTokens + completionTokens
+		completionTokens, err := service.CountTextToken(claudeResponse.Completion, info.OriginModelName)
+		if err != nil {
+			return service.OpenAIErrorWrapper(err, "count_token_text_failed", http.StatusInternalServerError)
+		}
+		claudeInfo.Usage.PromptTokens = info.PromptTokens
+		claudeInfo.Usage.CompletionTokens = completionTokens
+		claudeInfo.Usage.TotalTokens = info.PromptTokens + completionTokens
 	} else {
-		usage.PromptTokens = claudeResponse.Usage.InputTokens
-		usage.CompletionTokens = claudeResponse.Usage.OutputTokens
-		usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
+		claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
+		claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
+		claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
+		claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
+		claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
 	}
-	fullTextResponse.Usage = usage
-	jsonResponse, err := json.Marshal(fullTextResponse)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
+	var responseData []byte
+	switch info.RelayFormat {
+	case relaycommon.RelayFormatOpenAI:
+		openaiResponse := ResponseClaude2OpenAI(requestMode, &claudeResponse)
+		openaiResponse.Usage = *claudeInfo.Usage
+		responseData, err = json.Marshal(openaiResponse)
+		if err != nil {
+			return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError)
+		}
+	case relaycommon.RelayFormatClaude:
+		responseData = data
 	}
 	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = c.Writer.Write(jsonResponse)
-	return nil, &usage
+	c.Writer.WriteHeader(http.StatusOK)
+	_, err = c.Writer.Write(responseData)
+	return nil
+}
+
+func ClaudeHandler(c *gin.Context, resp *http.Response, requestMode int, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	claudeInfo := &ClaudeResponseInfo{
+		ResponseId:   fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
+		Created:      common.GetTimestamp(),
+		Model:        info.UpstreamModelName,
+		ResponseText: strings.Builder{},
+		Usage:        &dto.Usage{},
+	}
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	resp.Body.Close()
+	if common.DebugEnabled {
+		println("responseBody: ", string(responseBody))
+	}
+	handleErr := HandleClaudeResponseData(c, info, claudeInfo, responseBody, requestMode)
+	if handleErr != nil {
+		return handleErr, nil
+	}
+	return nil, claudeInfo.Usage
 }
--- a/relay/channel/cloudflare/adaptor.go
+++ b/relay/channel/cloudflare/adaptor.go
@@ -17,6 +17,12 @@ import (
 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

@@ -37,7 +43,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -49,6 +55,11 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, re
 	}
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
--- a/relay/channel/cloudflare/relay_cloudflare.go
+++ b/relay/channel/cloudflare/relay_cloudflare.go
@@ -9,6 +9,7 @@ import (
 	"one-api/common"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
 	"time"
@@ -28,8 +29,8 @@ func cfStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rela
 	scanner := bufio.NewScanner(resp.Body)
 	scanner.Split(bufio.ScanLines)

-	service.SetEventStreamHeaders(c)
-	id := service.GetResponseID(c)
+	helper.SetEventStreamHeaders(c)
+	id := helper.GetResponseID(c)
 	var responseText string
 	isFirst := true

@@ -57,7 +58,7 @@ func cfStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rela
 		}
 		response.Id = id
 		response.Model = info.UpstreamModelName
-		err = service.ObjectData(c, response)
+		err = helper.ObjectData(c, response)
 		if isFirst {
 			isFirst = false
 			info.FirstResponseTime = time.Now()
@@ -72,13 +73,13 @@ func cfStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rela
 	}
 	usage, _ := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
 	if info.ShouldIncludeUsage {
-		response := service.GenerateFinalUsageResponse(id, info.StartTime.Unix(), info.UpstreamModelName, *usage)
-		err := service.ObjectData(c, response)
+		response := helper.GenerateFinalUsageResponse(id, info.StartTime.Unix(), info.UpstreamModelName, *usage)
+		err := helper.ObjectData(c, response)
 		if err != nil {
 			common.LogError(c, "error_rendering_final_usage_response: "+err.Error())
 		}
 	}
-	service.Done(c)
+	helper.Done(c)

 	err := resp.Body.Close()
 	if err != nil {
@@ -109,7 +110,7 @@ func cfHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo)
 	}
 	usage, _ := service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
 	response.Usage = *usage
-	response.Id = service.GetResponseID(c)
+	response.Id = helper.GetResponseID(c)
 	jsonResponse, err := json.Marshal(response)
 	if err != nil {
 		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
--- a/relay/channel/cohere/adaptor.go
+++ b/relay/channel/cohere/adaptor.go
@@ -3,18 +3,25 @@ package cohere
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -42,10 +49,15 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	return requestOpenAI2Cohere(*request), nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -59,7 +71,6 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

-
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
 	if info.RelayMode == constant.RelayModeRerank {
 		err, usage = cohereRerankHandler(c, resp, info)
--- a/relay/channel/cohere/constant.go
+++ b/relay/channel/cohere/constant.go
@@ -1,6 +1,7 @@
 package cohere

 var ModelList = []string{
+	"command-a-03-2025",
 	"command-r", "command-r-plus",
 	"command-r-08-2024", "command-r-plus-08-2024",
 	"c4ai-aya-23-35b", "c4ai-aya-23-8b",
--- a/relay/channel/cohere/dto.go
+++ b/relay/channel/cohere/dto.go
@@ -40,8 +40,8 @@ type CohereRerankRequest struct {
 }

 type CohereRerankResponseResult struct {
-	Results []dto.RerankResponseDocument `json:"results"`
-	Meta    CohereMeta                   `json:"meta"`
+	Results []dto.RerankResponseResult `json:"results"`
+	Meta    CohereMeta                 `json:"meta"`
 }

 type CohereMeta struct {
--- a/relay/channel/cohere/relay-cohere.go
+++ b/relay/channel/cohere/relay-cohere.go
@@ -10,6 +10,7 @@ import (
 	"one-api/common"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
 	"time"
@@ -103,7 +104,7 @@ func cohereStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.
 		}
 		stopChan <- true
 	}()
-	service.SetEventStreamHeaders(c)
+	helper.SetEventStreamHeaders(c)
 	isFirst := true
 	c.Stream(func(w io.Writer) bool {
 		select {
--- a/relay/channel/deepseek/adaptor.go
+++ b/relay/channel/deepseek/adaptor.go
@@ -3,7 +3,6 @@ package deepseek
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
@@ -11,11 +10,20 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	"one-api/relay/constant"
+	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -30,9 +38,13 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	fimBaseUrl := info.BaseUrl
+	if !strings.HasSuffix(info.BaseUrl, "/beta") {
+		fimBaseUrl += "/beta"
+	}
 	switch info.RelayMode {
 	case constant.RelayModeCompletions:
-		return fmt.Sprintf("%s/beta/completions", info.BaseUrl), nil
+		return fmt.Sprintf("%s/completions", fimBaseUrl), nil
 	default:
 		return fmt.Sprintf("%s/v1/chat/completions", info.BaseUrl), nil
 	}
@@ -44,7 +56,7 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
@@ -60,6 +72,11 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -68,7 +85,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	if info.IsStream {
 		err, usage = openai.OaiStreamHandler(c, resp, info)
 	} else {
-		err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
+		err, usage = openai.OpenaiHandler(c, resp, info)
 	}
 	return
 }
--- a/relay/channel/dify/adaptor.go
+++ b/relay/channel/dify/adaptor.go
@@ -3,15 +3,30 @@ package dify
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
+)
+
+const (
+	BotTypeChatFlow   = 1 // chatflow default
+	BotTypeAgent      = 2
+	BotTypeWorkFlow   = 3
+	BotTypeCompletion = 4
 )

 type Adaptor struct {
+	BotType int
+}
+
+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
 }

 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
@@ -25,10 +40,29 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
+	//if strings.HasPrefix(info.UpstreamModelName, "agent") {
+	//	a.BotType = BotTypeAgent
+	//} else if strings.HasPrefix(info.UpstreamModelName, "workflow") {
+	//	a.BotType = BotTypeWorkFlow
+	//} else if strings.HasPrefix(info.UpstreamModelName, "chat") {
+	//	a.BotType = BotTypeCompletion
+	//} else {
+	//}
+	a.BotType = BotTypeChatFlow
+
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	switch a.BotType {
+	case BotTypeWorkFlow:
+		return fmt.Sprintf("%s/v1/workflows/run", info.BaseUrl), nil
+	case BotTypeCompletion:
+		return fmt.Sprintf("%s/v1/completion-messages", info.BaseUrl), nil
+	case BotTypeAgent:
+		fallthrough
+	default:
+		return fmt.Sprintf("%s/v1/chat-messages", info.BaseUrl), nil
+	}
 }

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
@@ -37,11 +71,11 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	return requestOpenAI2Dify(*request), nil
+	return requestOpenAI2Dify(c, info, *request), nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -53,6 +87,10 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}

 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
--- a/relay/channel/dify/dto.go
+++ b/relay/channel/dify/dto.go
@@ -8,6 +8,14 @@ type DifyChatRequest struct {
 	ResponseMode     string                 `json:"response_mode"`
 	User             string                 `json:"user"`
 	AutoGenerateName bool                   `json:"auto_generate_name"`
+	Files            []DifyFile             `json:"files"`
+}
+
+type DifyFile struct {
+	Type         string `json:"type"`
+	TransferMode string `json:"transfer_mode"`
+	URL          string `json:"url,omitempty"`
+	UploadFileId string `json:"upload_file_id,omitempty"`
 }

 type DifyMetaData struct {
@@ -17,6 +25,8 @@ type DifyMetaData struct {
 type DifyData struct {
 	WorkflowId string `json:"workflow_id"`
 	NodeId     string `json:"node_id"`
+	NodeType   string `json:"node_type"`
+	Status     string `json:"status"`
 }

 type DifyChatCompletionResponse struct {
--- a/relay/channel/dify/relay-dify.go
+++ b/relay/channel/dify/relay-dify.go
@@ -1,45 +1,176 @@
 package dify

 import (
-	"bufio"
+	"bytes"
+	"encoding/base64"
 	"encoding/json"
-	"github.com/gin-gonic/gin"
+	"fmt"
 	"io"
+	"mime/multipart"
 	"net/http"
 	"one-api/common"
 	"one-api/constant"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
+	"os"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

-func requestOpenAI2Dify(request dto.GeneralOpenAIRequest) *DifyChatRequest {
-	content := ""
-	for _, message := range request.Messages {
-		if message.Role == "system" {
-			content += "SYSTEM: \n" + message.StringContent() + "\n"
-		} else if message.Role == "assistant" {
-			content += "ASSISTANT: \n" + message.StringContent() + "\n"
-		} else {
-			content += "USER: \n" + message.StringContent() + "\n"
+func uploadDifyFile(c *gin.Context, info *relaycommon.RelayInfo, user string, media dto.MediaContent) *DifyFile {
+	uploadUrl := fmt.Sprintf("%s/v1/files/upload", info.BaseUrl)
+	switch media.Type {
+	case dto.ContentTypeImageURL:
+		// Decode base64 data
+		imageMedia := media.GetImageMedia()
+		base64Data := imageMedia.Url
+		// Remove base64 prefix if exists (e.g., "data:image/jpeg;base64,")
+		if idx := strings.Index(base64Data, ","); idx != -1 {
+			base64Data = base64Data[idx+1:]
+		}
+
+		// Decode base64 string
+		decodedData, err := base64.StdEncoding.DecodeString(base64Data)
+		if err != nil {
+			common.SysError("failed to decode base64: " + err.Error())
+			return nil
+		}
+
+		// Create temporary file
+		tempFile, err := os.CreateTemp("", "dify-upload-*")
+		if err != nil {
+			common.SysError("failed to create temp file: " + err.Error())
+			return nil
+		}
+		defer tempFile.Close()
+		defer os.Remove(tempFile.Name())
+
+		// Write decoded data to temp file
+		if _, err := tempFile.Write(decodedData); err != nil {
+			common.SysError("failed to write to temp file: " + err.Error())
+			return nil
+		}
+
+		// Create multipart form
+		body := &bytes.Buffer{}
+		writer := multipart.NewWriter(body)
+
+		// Add user field
+		if err := writer.WriteField("user", user); err != nil {
+			common.SysError("failed to add user field: " + err.Error())
+			return nil
+		}
+
+		// Create form file with proper mime type
+		mimeType := imageMedia.MimeType
+		if mimeType == "" {
+			mimeType = "image/jpeg" // default mime type
+		}
+
+		// Create form file
+		part, err := writer.CreateFormFile("file", fmt.Sprintf("image.%s", strings.TrimPrefix(mimeType, "image/")))
+		if err != nil {
+			common.SysError("failed to create form file: " + err.Error())
+			return nil
+		}
+
+		// Copy file content to form
+		if _, err = io.Copy(part, bytes.NewReader(decodedData)); err != nil {
+			common.SysError("failed to copy file content: " + err.Error())
+			return nil
+		}
+		writer.Close()
+
+		// Create HTTP request
+		req, err := http.NewRequest("POST", uploadUrl, body)
+		if err != nil {
+			common.SysError("failed to create request: " + err.Error())
+			return nil
+		}
+
+		req.Header.Set("Content-Type", writer.FormDataContentType())
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", info.ApiKey))
+
+		// Send request
+		client := service.GetImpatientHttpClient()
+		resp, err := client.Do(req)
+		if err != nil {
+			common.SysError("failed to send request: " + err.Error())
+			return nil
+		}
+		defer resp.Body.Close()
+
+		// Parse response
+		var result struct {
+			Id string `json:"id"`
+		}
+		if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+			common.SysError("failed to decode response: " + err.Error())
+			return nil
+		}
+
+		return &DifyFile{
+			UploadFileId: result.Id,
+			Type:         "image",
+			TransferMode: "local_file",
 		}
 	}
+	return nil
+}
+
+func requestOpenAI2Dify(c *gin.Context, info *relaycommon.RelayInfo, request dto.GeneralOpenAIRequest) *DifyChatRequest {
+	difyReq := DifyChatRequest{
+		Inputs:           make(map[string]interface{}),
+		AutoGenerateName: false,
+	}
+
+	user := request.User
+	if user == "" {
+		user = helper.GetResponseID(c)
+	}
+	difyReq.User = user
+
+	files := make([]DifyFile, 0)
+	var content strings.Builder
+	for _, message := range request.Messages {
+		if message.Role == "system" {
+			content.WriteString("SYSTEM: \n" + message.StringContent() + "\n")
+		} else if message.Role == "assistant" {
+			content.WriteString("ASSISTANT: \n" + message.StringContent() + "\n")
+		} else {
+			parseContent := message.ParseContent()
+			for _, mediaContent := range parseContent {
+				switch mediaContent.Type {
+				case dto.ContentTypeText:
+					content.WriteString("USER: \n" + mediaContent.Text + "\n")
+				case dto.ContentTypeImageURL:
+					media := mediaContent.GetImageMedia()
+					var file *DifyFile
+					if media.IsRemoteImage() {
+						file.Type = media.MimeType
+						file.TransferMode = "remote_url"
+						file.URL = media.Url
+					} else {
+						file = uploadDifyFile(c, info, difyReq.User, mediaContent)
+					}
+					if file != nil {
+						files = append(files, *file)
+					}
+				}
+			}
+		}
+	}
+	difyReq.Query = content.String()
+	difyReq.Files = files
 	mode := "blocking"
 	if request.Stream {
 		mode = "streaming"
 	}
-	user := request.User
-	if user == "" {
-		user = "api-user"
-	}
-	return &DifyChatRequest{
-		Inputs:           make(map[string]interface{}),
-		Query:            content,
-		ResponseMode:     mode,
-		User:             user,
-		AutoGenerateName: false,
-	}
+	difyReq.ResponseMode = mode
+	return &difyReq
 }

 func streamResponseDify2OpenAI(difyResponse DifyChunkChatCompletionResponse) *dto.ChatCompletionsStreamResponse {
@@ -49,11 +180,29 @@ func streamResponseDify2OpenAI(difyResponse DifyChunkChatCompletionResponse) *dt
 		Model:   "dify",
 	}
 	var choice dto.ChatCompletionsStreamResponseChoice
-	if constant.DifyDebug && difyResponse.Event == "workflow_started" {
-		choice.Delta.SetContentString("Workflow: " + difyResponse.Data.WorkflowId + "\n")
-	} else if constant.DifyDebug && difyResponse.Event == "node_started" {
-		choice.Delta.SetContentString("Node: " + difyResponse.Data.NodeId + "\n")
+	if strings.HasPrefix(difyResponse.Event, "workflow_") {
+		if constant.DifyDebug {
+			text := "Workflow: " + difyResponse.Data.WorkflowId
+			if difyResponse.Event == "workflow_finished" {
+				text += " " + difyResponse.Data.Status
+			}
+			choice.Delta.SetReasoningContent(text + "\n")
+		}
+	} else if strings.HasPrefix(difyResponse.Event, "node_") {
+		if constant.DifyDebug {
+			text := "Node: " + difyResponse.Data.NodeType
+			if difyResponse.Event == "node_finished" {
+				text += " " + difyResponse.Data.Status
+			}
+			choice.Delta.SetReasoningContent(text + "\n")
+		}
 	} else if difyResponse.Event == "message" || difyResponse.Event == "agent_message" {
+		if difyResponse.Answer == "<details style=\"color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;\" open> <summary> Thinking... </summary>\n" {
+			difyResponse.Answer = "<think>"
+		} else if difyResponse.Answer == "</details>" {
+			difyResponse.Answer = "</think>"
+		}
+
 		choice.Delta.SetContentString(difyResponse.Answer)
 	}
 	response.Choices = append(response.Choices, choice)
@@ -63,47 +212,40 @@ func streamResponseDify2OpenAI(difyResponse DifyChunkChatCompletionResponse) *dt
 func difyStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
 	var responseText string
 	usage := &dto.Usage{}
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
-
-	service.SetEventStreamHeaders(c)
-
-	for scanner.Scan() {
-		data := scanner.Text()
-		if len(data) < 5 || !strings.HasPrefix(data, "data:") {
-			continue
-		}
-		data = strings.TrimPrefix(data, "data:")
+	var nodeToken int
+	helper.SetEventStreamHeaders(c)
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var difyResponse DifyChunkChatCompletionResponse
 		err := json.Unmarshal([]byte(data), &difyResponse)
 		if err != nil {
 			common.SysError("error unmarshalling stream response: " + err.Error())
-			continue
+			return true
 		}
 		var openaiResponse dto.ChatCompletionsStreamResponse
 		if difyResponse.Event == "message_end" {
 			usage = &difyResponse.MetaData.Usage
-			break
+			return false
 		} else if difyResponse.Event == "error" {
-			break
+			return false
 		} else {
 			openaiResponse = *streamResponseDify2OpenAI(difyResponse)
 			if len(openaiResponse.Choices) != 0 {
 				responseText += openaiResponse.Choices[0].Delta.GetContentString()
+				if openaiResponse.Choices[0].Delta.ReasoningContent != nil {
+					nodeToken += 1
+				}
 			}
 		}
-		err = service.ObjectData(c, openaiResponse)
+		err = helper.ObjectData(c, openaiResponse)
 		if err != nil {
 			common.SysError(err.Error())
 		}
-	}
-	if err := scanner.Err(); err != nil {
-		common.SysError("error reading stream: " + err.Error())
-	}
-	service.Done(c)
+		return true
+	})
+	helper.Done(c)
 	err := resp.Body.Close()
 	if err != nil {
-		//return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+		// return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
 		common.SysError("close_response_body_failed: " + err.Error())
 	}
 	if usage.TotalTokens == 0 {
@@ -111,6 +253,7 @@ func difyStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 		usage.CompletionTokens, _ = service.CountTextToken("gpt-3.5-turbo", responseText)
 		usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
 	}
+	usage.CompletionTokens += nodeToken
 	return nil, usage
 }

--- a/relay/channel/gemini/adaptor.go
+++ b/relay/channel/gemini/adaptor.go
@@ -7,12 +7,11 @@ import (
 	"io"
 	"net/http"
 	"one-api/common"
-	"one-api/constant"
 	"one-api/dto"
 	"one-api/relay/channel"
 	relaycommon "one-api/relay/common"
 	"one-api/service"
-
+	"one-api/setting/model_setting"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -21,6 +20,12 @@ import (
 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -64,20 +69,28 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	// 从映射中获取模型名称对应的版本，如果找不到就使用 info.ApiVersion 或默认的版本 "v1beta"
-	version, beta := constant.GeminiModelMap[info.UpstreamModelName]
-	if !beta {
-		if info.ApiVersion != "" {
-			version = info.ApiVersion
-		} else {
-			version = "v1beta"
+
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		// suffix -thinking and -nothinking
+		if strings.HasSuffix(info.OriginModelName, "-thinking") {
+			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
+		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
+			info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
 		}
 	}

+	version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName)
+
 	if strings.HasPrefix(info.UpstreamModelName, "imagen") {
 		return fmt.Sprintf("%s/%s/models/%s:predict", info.BaseUrl, version, info.UpstreamModelName), nil
 	}

+	if strings.HasPrefix(info.UpstreamModelName, "text-embedding") ||
+		strings.HasPrefix(info.UpstreamModelName, "embedding") ||
+		strings.HasPrefix(info.UpstreamModelName, "gemini-embedding") {
+		return fmt.Sprintf("%s/%s/models/%s:embedContent", info.BaseUrl, version, info.UpstreamModelName), nil
+	}
+
 	action := "generateContent"
 	if info.IsStream {
 		action = "streamGenerateContent?alt=sse"
@@ -91,15 +104,17 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	ai, err := CovertGemini2OpenAI(*request)
+
+	geminiRequest, err := CovertGemini2OpenAI(*request, info)
 	if err != nil {
 		return nil, err
 	}
-	return ai, nil
+
+	return geminiRequest, nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -107,7 +122,41 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt
 }

 func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
-	//TODO implement me
+	if request.Input == nil {
+		return nil, errors.New("input is required")
+	}
+
+	inputs := request.ParseInput()
+	if len(inputs) == 0 {
+		return nil, errors.New("input is empty")
+	}
+
+	// only process the first input
+	geminiRequest := GeminiEmbeddingRequest{
+		Content: GeminiChatContent{
+			Parts: []GeminiPart{
+				{
+					Text: inputs[0],
+				},
+			},
+		},
+	}
+
+	// set specific parameters for different models
+	// https://ai.google.dev/api/embeddings?hl=zh-cn#method:-models.embedcontent
+	switch info.UpstreamModelName {
+	case "text-embedding-004":
+		// except embedding-001 supports setting `OutputDimensionality`
+		if request.Dimensions > 0 {
+			geminiRequest.OutputDimensionality = request.Dimensions
+		}
+	}
+
+	return geminiRequest, nil
+}
+
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
 	return nil, errors.New("not implemented")
 }

@@ -120,11 +169,30 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		return GeminiImageHandler(c, resp, info)
 	}

+	// check if the model is an embedding model
+	if strings.HasPrefix(info.UpstreamModelName, "text-embedding") ||
+		strings.HasPrefix(info.UpstreamModelName, "embedding") ||
+		strings.HasPrefix(info.UpstreamModelName, "gemini-embedding") {
+		return GeminiEmbeddingHandler(c, resp, info)
+	}
+
 	if info.IsStream {
 		err, usage = GeminiChatStreamHandler(c, resp, info)
 	} else {
 		err, usage = GeminiChatHandler(c, resp, info)
 	}
+
+	//if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 {
+	//	// 没有请求-thinking的情况下，产生思考token，则按照思考模型计费
+	//	if !strings.HasSuffix(info.OriginModelName, "-thinking") &&
+	//		!strings.HasSuffix(info.OriginModelName, "-nothinking") {
+	//		thinkingModelName := info.OriginModelName + "-thinking"
+	//		if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) {
+	//			info.OriginModelName = thinkingModelName
+	//		}
+	//	}
+	//}
+
 	return
 }

--- a/relay/channel/gemini/constant.go
+++ b/relay/channel/gemini/constant.go
@@ -16,8 +16,22 @@ var ModelList = []string{
 	"gemini-2.0-pro-exp",
 	// thinking exp
 	"gemini-2.0-flash-thinking-exp",
+	"gemini-2.5-pro-exp-03-25",
+	"gemini-2.5-pro-preview-03-25",
 	// imagen models
 	"imagen-3.0-generate-002",
+	// embedding models
+	"gemini-embedding-exp-03-07",
+	"text-embedding-004",
+	"embedding-001",
+}
+
+var SafetySettingList = []string{
+	"HARM_CATEGORY_HARASSMENT",
+	"HARM_CATEGORY_HATE_SPEECH",
+	"HARM_CATEGORY_SEXUALLY_EXPLICIT",
+	"HARM_CATEGORY_DANGEROUS_CONTENT",
+	"HARM_CATEGORY_CIVIC_INTEGRITY",
 }

 var ChannelName = "google gemini"
--- a/relay/channel/gemini/dto.go
+++ b/relay/channel/gemini/dto.go
@@ -8,6 +8,15 @@ type GeminiChatRequest struct {
 	SystemInstructions *GeminiChatContent         `json:"system_instruction,omitempty"`
 }

+type GeminiThinkingConfig struct {
+	IncludeThoughts bool `json:"includeThoughts,omitempty"`
+	ThinkingBudget  *int `json:"thinkingBudget,omitempty"`
+}
+
+func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
+	c.ThinkingBudget = &budget
+}
+
 type GeminiInlineData struct {
 	MimeType string `json:"mimeType"`
 	Data     string `json:"data"`
@@ -71,15 +80,17 @@ type GeminiChatTool struct {
 }

 type GeminiChatGenerationConfig struct {
-	Temperature      *float64 `json:"temperature,omitempty"`
-	TopP             float64  `json:"topP,omitempty"`
-	TopK             float64  `json:"topK,omitempty"`
-	MaxOutputTokens  uint     `json:"maxOutputTokens,omitempty"`
-	CandidateCount   int      `json:"candidateCount,omitempty"`
-	StopSequences    []string `json:"stopSequences,omitempty"`
-	ResponseMimeType string   `json:"responseMimeType,omitempty"`
-	ResponseSchema   any      `json:"responseSchema,omitempty"`
-	Seed             int64    `json:"seed,omitempty"`
+	Temperature        *float64              `json:"temperature,omitempty"`
+	TopP               float64               `json:"topP,omitempty"`
+	TopK               float64               `json:"topK,omitempty"`
+	MaxOutputTokens    uint                  `json:"maxOutputTokens,omitempty"`
+	CandidateCount     int                   `json:"candidateCount,omitempty"`
+	StopSequences      []string              `json:"stopSequences,omitempty"`
+	ResponseMimeType   string                `json:"responseMimeType,omitempty"`
+	ResponseSchema     any                   `json:"responseSchema,omitempty"`
+	Seed               int64                 `json:"seed,omitempty"`
+	ResponseModalities []string              `json:"responseModalities,omitempty"`
+	ThinkingConfig     *GeminiThinkingConfig `json:"thinkingConfig,omitempty"`
 }

 type GeminiChatCandidate struct {
@@ -108,6 +119,7 @@ type GeminiUsageMetadata struct {
 	PromptTokenCount     int `json:"promptTokenCount"`
 	CandidatesTokenCount int `json:"candidatesTokenCount"`
 	TotalTokenCount      int `json:"totalTokenCount"`
+	ThoughtsTokenCount   int `json:"thoughtsTokenCount"`
 }

 // Imagen related structs
@@ -136,3 +148,19 @@ type GeminiImagePrediction struct {
 	RaiFilteredReason  string `json:"raiFilteredReason,omitempty"`
 	SafetyAttributes   any    `json:"safetyAttributes,omitempty"`
 }
+
+// Embedding related structs
+type GeminiEmbeddingRequest struct {
+	Content              GeminiChatContent `json:"content"`
+	TaskType             string            `json:"taskType,omitempty"`
+	Title                string            `json:"title,omitempty"`
+	OutputDimensionality int               `json:"outputDimensionality,omitempty"`
+}
+
+type GeminiEmbeddingResponse struct {
+	Embedding ContentEmbedding `json:"embedding"`
+}
+
+type ContentEmbedding struct {
+	Values []float64 `json:"values"`
+}
--- a/relay/channel/gemini/relay-gemini.go
+++ b/relay/channel/gemini/relay-gemini.go
@@ -1,7 +1,6 @@
 package gemini

 import (
-	"bufio"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -10,7 +9,9 @@ import (
 	"one-api/constant"
 	"one-api/dto"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/helper"
 	"one-api/service"
+	"one-api/setting/model_setting"
 	"strings"
 	"unicode/utf8"

@@ -18,32 +19,10 @@ import (
 )

 // Setting safety to the lowest possible values since Gemini is already powerless enough
-func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatRequest, error) {
+func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) {

 	geminiRequest := GeminiChatRequest{
 		Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
-		SafetySettings: []GeminiChatSafetySettings{
-			{
-				Category:  "HARM_CATEGORY_HARASSMENT",
-				Threshold: common.GeminiSafetySetting,
-			},
-			{
-				Category:  "HARM_CATEGORY_HATE_SPEECH",
-				Threshold: common.GeminiSafetySetting,
-			},
-			{
-				Category:  "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-				Threshold: common.GeminiSafetySetting,
-			},
-			{
-				Category:  "HARM_CATEGORY_DANGEROUS_CONTENT",
-				Threshold: common.GeminiSafetySetting,
-			},
-			{
-				Category:  "HARM_CATEGORY_CIVIC_INTEGRITY",
-				Threshold: common.GeminiSafetySetting,
-			},
-		},
 		GenerationConfig: GeminiChatGenerationConfig{
 			Temperature:     textRequest.Temperature,
 			TopP:            textRequest.TopP,
@@ -52,9 +31,42 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 		},
 	}

+	if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
+		geminiRequest.GenerationConfig.ResponseModalities = []string{
+			"TEXT",
+			"IMAGE",
+		}
+	}
+
+	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
+		if strings.HasSuffix(info.OriginModelName, "-thinking") {
+			budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
+			if budgetTokens == 0 || budgetTokens > 24576 {
+				budgetTokens = 24576
+			}
+			geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+				ThinkingBudget:  common.GetPointer(int(budgetTokens)),
+				IncludeThoughts: true,
+			}
+		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
+			geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+				ThinkingBudget: common.GetPointer(0),
+			}
+		}
+	}
+
+	safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList))
+	for _, category := range SafetySettingList {
+		safetySettings = append(safetySettings, GeminiChatSafetySettings{
+			Category:  category,
+			Threshold: model_setting.GetGeminiSafetySetting(category),
+		})
+	}
+	geminiRequest.SafetySettings = safetySettings
+
 	// openaiContent.FuncToToolCalls()
 	if textRequest.Tools != nil {
-		functions := make([]dto.FunctionCall, 0, len(textRequest.Tools))
+		functions := make([]dto.FunctionRequest, 0, len(textRequest.Tools))
 		googleSearch := false
 		codeExecution := false
 		for _, tool := range textRequest.Tools {
@@ -67,6 +79,7 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 				continue
 			}
 			if tool.Function.Parameters != nil {
+
 				params, ok := tool.Function.Parameters.(map[string]interface{})
 				if ok {
 					if props, hasProps := params["properties"].(map[string]interface{}); hasProps {
@@ -76,6 +89,9 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 					}
 				}
 			}
+			// Clean the parameters before appending
+			cleanedParams := cleanFunctionParameters(tool.Function.Parameters)
+			tool.Function.Parameters = cleanedParams
 			functions = append(functions, tool.Function)
 		}
 		if codeExecution {
@@ -97,11 +113,11 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 		// json_data, _ := json.Marshal(geminiRequest.Tools)
 		// common.SysLog("tools_json: " + string(json_data))
 	} else if textRequest.Functions != nil {
-		geminiRequest.Tools = []GeminiChatTool{
-			{
-				FunctionDeclarations: textRequest.Functions,
-			},
-		}
+		//geminiRequest.Tools = []GeminiChatTool{
+		//	{
+		//		FunctionDeclarations: textRequest.Functions,
+		//	},
+		//}
 	}

 	if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
@@ -191,9 +207,9 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 					return nil, fmt.Errorf("too many images in the message, max allowed is %d", constant.GeminiVisionMaxImageNum)
 				}
 				// 判断是否是url
-				if strings.HasPrefix(part.ImageUrl.(dto.MessageImageUrl).Url, "http") {
+				if strings.HasPrefix(part.GetImageMedia().Url, "http") {
 					// 是url，获取图片的类型和base64编码的数据
-					fileData, err := service.GetFileBase64FromUrl(part.ImageUrl.(dto.MessageImageUrl).Url)
+					fileData, err := service.GetFileBase64FromUrl(part.GetImageMedia().Url)
 					if err != nil {
 						return nil, fmt.Errorf("get file base64 from url failed: %s", err.Error())
 					}
@@ -204,7 +220,7 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 						},
 					})
 				} else {
-					format, base64String, err := service.DecodeBase64FileData(part.ImageUrl.(dto.MessageImageUrl).Url)
+					format, base64String, err := service.DecodeBase64FileData(part.GetImageMedia().Url)
 					if err != nil {
 						return nil, fmt.Errorf("decode base64 image data failed: %s", err.Error())
 					}
@@ -215,6 +231,34 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 						},
 					})
 				}
+			} else if part.Type == dto.ContentTypeFile {
+				if part.GetFile().FileId != "" {
+					return nil, fmt.Errorf("only base64 file is supported in gemini")
+				}
+				format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData)
+				if err != nil {
+					return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error())
+				}
+				parts = append(parts, GeminiPart{
+					InlineData: &GeminiInlineData{
+						MimeType: format,
+						Data:     base64String,
+					},
+				})
+			} else if part.Type == dto.ContentTypeInputAudio {
+				if part.GetInputAudio().Data == "" {
+					return nil, fmt.Errorf("only base64 audio is supported in gemini")
+				}
+				format, base64String, err := service.DecodeBase64FileData(part.GetInputAudio().Data)
+				if err != nil {
+					return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
+				}
+				parts = append(parts, GeminiPart{
+					InlineData: &GeminiInlineData{
+						MimeType: format,
+						Data:     base64String,
+					},
+				})
 			}
 		}

@@ -240,6 +284,102 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatReque
 	return &geminiRequest, nil
 }

+// cleanFunctionParameters recursively removes unsupported fields from Gemini function parameters.
+func cleanFunctionParameters(params interface{}) interface{} {
+	if params == nil {
+		return nil
+	}
+
+	paramMap, ok := params.(map[string]interface{})
+	if !ok {
+		// Not a map, return as is (e.g., could be an array or primitive)
+		return params
+	}
+
+	// Create a copy to avoid modifying the original
+	cleanedMap := make(map[string]interface{})
+	for k, v := range paramMap {
+		cleanedMap[k] = v
+	}
+
+	// Remove unsupported root-level fields
+	delete(cleanedMap, "default")
+	delete(cleanedMap, "exclusiveMaximum")
+	delete(cleanedMap, "exclusiveMinimum")
+	delete(cleanedMap, "$schema")
+	delete(cleanedMap, "additionalProperties")
+
+	// Clean properties
+	if props, ok := cleanedMap["properties"].(map[string]interface{}); ok && props != nil {
+		cleanedProps := make(map[string]interface{})
+		for propName, propValue := range props {
+			propMap, ok := propValue.(map[string]interface{})
+			if !ok {
+				cleanedProps[propName] = propValue // Keep non-map properties
+				continue
+			}
+
+			// Create a copy of the property map
+			cleanedPropMap := make(map[string]interface{})
+			for k, v := range propMap {
+				cleanedPropMap[k] = v
+			}
+
+			// Remove unsupported fields
+			delete(cleanedPropMap, "default")
+			delete(cleanedPropMap, "exclusiveMaximum")
+			delete(cleanedPropMap, "exclusiveMinimum")
+			delete(cleanedPropMap, "$schema")
+			delete(cleanedPropMap, "additionalProperties")
+
+			// Check and clean 'format' for string types
+			if propType, typeExists := cleanedPropMap["type"].(string); typeExists && propType == "string" {
+				if formatValue, formatExists := cleanedPropMap["format"].(string); formatExists {
+					if formatValue != "enum" && formatValue != "date-time" {
+						delete(cleanedPropMap, "format")
+					}
+				}
+			}
+
+			// Recursively clean nested properties within this property if it's an object/array
+			// Check the type before recursing
+			if propType, typeExists := cleanedPropMap["type"].(string); typeExists && (propType == "object" || propType == "array") {
+				cleanedProps[propName] = cleanFunctionParameters(cleanedPropMap)
+			} else {
+				cleanedProps[propName] = cleanedPropMap // Assign the cleaned map back if not recursing
+			}
+
+		}
+		cleanedMap["properties"] = cleanedProps
+	}
+
+	// Recursively clean items in arrays if needed (e.g., type: array, items: { ... })
+	if items, ok := cleanedMap["items"].(map[string]interface{}); ok && items != nil {
+		cleanedMap["items"] = cleanFunctionParameters(items)
+	}
+	// Also handle items if it's an array of schemas
+	if itemsArray, ok := cleanedMap["items"].([]interface{}); ok {
+		cleanedItemsArray := make([]interface{}, len(itemsArray))
+		for i, item := range itemsArray {
+			cleanedItemsArray[i] = cleanFunctionParameters(item)
+		}
+		cleanedMap["items"] = cleanedItemsArray
+	}
+
+	// Recursively clean other schema composition keywords if necessary
+	for _, field := range []string{"allOf", "anyOf", "oneOf"} {
+		if nested, ok := cleanedMap[field].([]interface{}); ok {
+			cleanedNested := make([]interface{}, len(nested))
+			for i, item := range nested {
+				cleanedNested[i] = cleanFunctionParameters(item)
+			}
+			cleanedMap[field] = cleanedNested
+		}
+	}
+
+	return cleanedMap
+}
+
 func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interface{} {
 	if depth >= 5 {
 		return schema
@@ -251,6 +391,7 @@ func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interfac
 	}
 	// 删除所有的title字段
 	delete(v, "title")
+	delete(v, "$schema")
 	// 如果type不为object和array，则直接返回
 	if typeVal, exists := v["type"]; !exists || (typeVal != "object" && typeVal != "array") {
 		return schema
@@ -349,7 +490,7 @@ func unescapeMapOrSlice(data interface{}) interface{} {
 	return data
 }

-func getToolCall(item *GeminiPart) *dto.ToolCall {
+func getResponseToolCall(item *GeminiPart) *dto.ToolCallResponse {
 	var argsBytes []byte
 	var err error
 	if result, ok := item.FunctionCall.Arguments.(map[string]interface{}); ok {
@@ -361,10 +502,10 @@ func getToolCall(item *GeminiPart) *dto.ToolCall {
 	if err != nil {
 		return nil
 	}
-	return &dto.ToolCall{
+	return &dto.ToolCallResponse{
 		ID:   fmt.Sprintf("call_%s", common.GetUUID()),
 		Type: "function",
-		Function: dto.FunctionCall{
+		Function: dto.FunctionResponse{
 			Arguments: string(argsBytes),
 			Name:      item.FunctionCall.FunctionName,
 		},
@@ -379,7 +520,7 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 		Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
 	}
 	content, _ := json.Marshal("")
-	is_tool_call := false
+	isToolCall := false
 	for _, candidate := range response.Candidates {
 		choice := dto.OpenAITextResponseChoice{
 			Index: int(candidate.Index),
@@ -391,12 +532,12 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 		}
 		if len(candidate.Content.Parts) > 0 {
 			var texts []string
-			var tool_calls []dto.ToolCall
+			var toolCalls []dto.ToolCallResponse
 			for _, part := range candidate.Content.Parts {
 				if part.FunctionCall != nil {
 					choice.FinishReason = constant.FinishReasonToolCalls
-					if call := getToolCall(&part); call != nil {
-						tool_calls = append(tool_calls, *call)
+					if call := getResponseToolCall(&part); call != nil {
+						toolCalls = append(toolCalls, *call)
 					}
 				} else {
 					if part.ExecutableCode != nil {
@@ -411,9 +552,9 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 					}
 				}
 			}
-			if len(tool_calls) > 0 {
-				choice.Message.SetToolCalls(tool_calls)
-				is_tool_call = true
+			if len(toolCalls) > 0 {
+				choice.Message.SetToolCalls(toolCalls)
+				isToolCall = true
 			}

 			choice.Message.SetStringContent(strings.Join(texts, "\n"))
@@ -429,7 +570,7 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 				choice.FinishReason = constant.FinishReasonContentFilter
 			}
 		}
-		if is_tool_call {
+		if isToolCall {
 			choice.FinishReason = constant.FinishReasonToolCalls
 		}

@@ -438,12 +579,13 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 	return &fullTextResponse
 }

-func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool) {
+func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool, bool) {
 	choices := make([]dto.ChatCompletionsStreamResponseChoice, 0, len(geminiResponse.Candidates))
-	is_stop := false
+	isStop := false
+	hasImage := false
 	for _, candidate := range geminiResponse.Candidates {
 		if candidate.FinishReason != nil && *candidate.FinishReason == "STOP" {
-			is_stop = true
+			isStop = true
 			candidate.FinishReason = nil
 		}
 		choice := dto.ChatCompletionsStreamResponseChoice{
@@ -466,9 +608,15 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
 			}
 		}
 		for _, part := range candidate.Content.Parts {
-			if part.FunctionCall != nil {
+			if part.InlineData != nil {
+				if strings.HasPrefix(part.InlineData.MimeType, "image") {
+					imgText := "![image](data:" + part.InlineData.MimeType + ";base64," + part.InlineData.Data + ")"
+					texts = append(texts, imgText)
+					hasImage = true
+				}
+			} else if part.FunctionCall != nil {
 				isTools = true
-				if call := getToolCall(&part); call != nil {
+				if call := getResponseToolCall(&part); call != nil {
 					call.SetIndex(len(choice.Delta.ToolCalls))
 					choice.Delta.ToolCalls = append(choice.Delta.ToolCalls, *call)
 				}
@@ -493,9 +641,8 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C

 	var response dto.ChatCompletionsStreamResponse
 	response.Object = "chat.completion.chunk"
-	response.Model = "gemini"
 	response.Choices = choices
-	return &response, is_stop
+	return &response, isStop, hasImage
 }

 func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
@@ -503,60 +650,60 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
 	id := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
 	createAt := common.GetTimestamp()
 	var usage = &dto.Usage{}
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
+	var imageCount int

-	service.SetEventStreamHeaders(c)
-	for scanner.Scan() {
-		data := scanner.Text()
-		info.SetFirstResponseTime()
-		data = strings.TrimSpace(data)
-		if !strings.HasPrefix(data, "data: ") {
-			continue
-		}
-		data = strings.TrimPrefix(data, "data: ")
-		data = strings.TrimSuffix(data, "\"")
+	helper.StreamScannerHandler(c, resp, info, func(data string) bool {
 		var geminiResponse GeminiChatResponse
-		err := json.Unmarshal([]byte(data), &geminiResponse)
+		err := common.DecodeJsonStr(data, &geminiResponse)
 		if err != nil {
 			common.LogError(c, "error unmarshalling stream response: "+err.Error())
-			continue
+			return false
 		}

-		response, is_stop := streamResponseGeminiChat2OpenAI(&geminiResponse)
+		response, isStop, hasImage := streamResponseGeminiChat2OpenAI(&geminiResponse)
+		if hasImage {
+			imageCount++
+		}
 		response.Id = id
 		response.Created = createAt
 		response.Model = info.UpstreamModelName
-		// responseText += response.Choices[0].Delta.GetContentString()
 		if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
 			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
 			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
+			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
 		}
-		err = service.ObjectData(c, response)
+		err = helper.ObjectData(c, response)
 		if err != nil {
 			common.LogError(c, err.Error())
 		}
-		if is_stop {
-			response := service.GenerateStopResponse(id, createAt, info.UpstreamModelName, constant.FinishReasonStop)
-			service.ObjectData(c, response)
+		if isStop {
+			response := helper.GenerateStopResponse(id, createAt, info.UpstreamModelName, constant.FinishReasonStop)
+			helper.ObjectData(c, response)
 		}
-	}
+		return true
+	})

 	var response *dto.ChatCompletionsStreamResponse

-	usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
+	if imageCount != 0 {
+		if usage.CompletionTokens == 0 {
+			usage.CompletionTokens = imageCount * 258
+		}
+	}
+
 	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
-	usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
+	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens

 	if info.ShouldIncludeUsage {
-		response = service.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
-		err := service.ObjectData(c, response)
+		response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
+		err := helper.ObjectData(c, response)
 		if err != nil {
 			common.SysError("send final response failed: " + err.Error())
 		}
 	}
-	service.Done(c)
-	resp.Body.Close()
+	helper.Done(c)
+	//resp.Body.Close()
 	return nil, usage
 }

@@ -592,6 +739,10 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 		CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
 		TotalTokens:      geminiResponse.UsageMetadata.TotalTokenCount,
 	}
+
+	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
+
 	fullTextResponse.Usage = usage
 	jsonResponse, err := json.Marshal(fullTextResponse)
 	if err != nil {
@@ -602,3 +753,52 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	_, err = c.Writer.Write(jsonResponse)
 	return nil, &usage
 }
+
+func GeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
+	responseBody, readErr := io.ReadAll(resp.Body)
+	if readErr != nil {
+		return nil, service.OpenAIErrorWrapper(readErr, "read_response_body_failed", http.StatusInternalServerError)
+	}
+	_ = resp.Body.Close()
+
+	var geminiResponse GeminiEmbeddingResponse
+	if jsonErr := json.Unmarshal(responseBody, &geminiResponse); jsonErr != nil {
+		return nil, service.OpenAIErrorWrapper(jsonErr, "unmarshal_response_body_failed", http.StatusInternalServerError)
+	}
+
+	// convert to openai format response
+	openAIResponse := dto.OpenAIEmbeddingResponse{
+		Object: "list",
+		Data: []dto.OpenAIEmbeddingResponseItem{
+			{
+				Object:    "embedding",
+				Embedding: geminiResponse.Embedding.Values,
+				Index:     0,
+			},
+		},
+		Model: info.UpstreamModelName,
+	}
+
+	// calculate usage
+	// https://ai.google.dev/gemini-api/docs/pricing?hl=zh-cn#text-embedding-004
+	// Google has not yet clarified how embedding models will be billed
+	// refer to openai billing method to use input tokens billing
+	// https://platform.openai.com/docs/guides/embeddings#what-are-embeddings
+	usage = &dto.Usage{
+		PromptTokens:     info.PromptTokens,
+		CompletionTokens: 0,
+		TotalTokens:      info.PromptTokens,
+	}
+	openAIResponse.Usage = *usage.(*dto.Usage)
+
+	jsonResponse, jsonErr := json.Marshal(openAIResponse)
+	if jsonErr != nil {
+		return nil, service.OpenAIErrorWrapper(jsonErr, "marshal_response_failed", http.StatusInternalServerError)
+	}
+
+	c.Writer.Header().Set("Content-Type", "application/json")
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, _ = c.Writer.Write(jsonResponse)
+
+	return usage, nil
+}
--- a/relay/channel/jina/adaptor.go
+++ b/relay/channel/jina/adaptor.go
@@ -3,18 +3,27 @@ package jina
 import (
 	"errors"
 	"fmt"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
+	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+	"one-api/relay/common_handler"
 	"one-api/relay/constant"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -43,10 +52,15 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	return request, nil
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}
+
 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
 }
@@ -61,9 +75,9 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela

 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *dto.OpenAIErrorWithStatusCode) {
 	if info.RelayMode == constant.RelayModeRerank {
-		err, usage = jinaRerankHandler(c, resp)
+		err, usage = common_handler.RerankHandler(c, info, resp)
 	} else if info.RelayMode == constant.RelayModeEmbeddings {
-		err, usage = jinaEmbeddingHandler(c, resp)
+		err, usage = openai.OpenaiHandler(c, resp, info)
 	}
 	return
 }
--- a/relay/channel/jina/relay-jina.go
+++ b/relay/channel/jina/relay-jina.go
@@ -1,60 +1 @@
 package jina
-
-import (
-	"encoding/json"
-	"github.com/gin-gonic/gin"
-	"io"
-	"net/http"
-	"one-api/dto"
-	"one-api/service"
-)
-
-func jinaRerankHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	var jinaResp dto.RerankResponse
-	err = json.Unmarshal(responseBody, &jinaResp)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-
-	jsonResponse, err := json.Marshal(jinaResp)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = c.Writer.Write(jsonResponse)
-	return nil, &jinaResp.Usage
-}
-
-func jinaEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	var jinaResp dto.OpenAIEmbeddingResponse
-	err = json.Unmarshal(responseBody, &jinaResp)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-
-	jsonResponse, err := json.Marshal(jinaResp)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
-	}
-	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = c.Writer.Write(jsonResponse)
-	return nil, &jinaResp.Usage
-}
--- a/relay/channel/mistral/adaptor.go
+++ b/relay/channel/mistral/adaptor.go
@@ -2,18 +2,25 @@ package mistral

 import (
 	"errors"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/dto"
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
+
+	"github.com/gin-gonic/gin"
 )

 type Adaptor struct {
 }

+func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
+	//TODO implement me
+	panic("implement me")
+	return nil, nil
+}
+
 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
 	//TODO implement me
 	return nil, errors.New("not implemented")
@@ -37,13 +44,11 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *rel
 	return nil
 }

-func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
 	if request == nil {
 		return nil, errors.New("request is nil")
 	}
-	mistralReq := requestOpenAI2Mistral(*request)
-	//common.LogJson(c, "body", mistralReq)
-	return mistralReq, nil
+	return requestOpenAI2Mistral(request), nil
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -55,6 +60,10 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 	return nil, errors.New("not implemented")
 }

+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	// TODO implement me
+	return nil, errors.New("not implemented")
+}

 func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
 	return channel.DoApiRequest(a, c, info, requestBody)
@@ -64,7 +73,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 	if info.IsStream {
 		err, usage = openai.OaiStreamHandler(c, resp, info)
 	} else {
-		err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
+		err, usage = openai.OpenaiHandler(c, resp, info)
 	}
 	return
 }
--- a/Show More
+++ b/Show More