Compare commits

...

131 Commits

Author SHA1 Message Date
thirking
8d67c571e4 fix: remove unnecessary unescapeMapOrSlice call in Gemini relay
The JSON serialization/deserialization already handles escape characters
correctly, so the unescapeMapOrSlice function is redundant.
2026-02-03 11:47:45 +08:00
Seefs
cbebd15692 fix: vertex maas api addr (#2810)
* fix: vertex maas api addr
2026-02-03 00:09:45 +08:00
Calcium-Ion
afa9efa037 feat: default enable channel affinity (#2809) 2026-02-03 00:05:23 +08:00
Seefs
760fbeb6e6 Merge pull request #2811 from seefs001/fix/openrouter-claude-cache-usage
fix: openrouter claude cache usage
2026-02-03 00:03:19 +08:00
Seefs
ee0487806c Merge pull request #2764 from feitianbubu/pr/4baa0f472b6f35ce3426cd8ea0d13f38e2f4eb81
feat: auto-adapt video modal
2026-02-02 22:28:12 +08:00
Seefs
c50eff53d4 feat: default enable channel affinity 2026-02-02 22:21:49 +08:00
CaIon
16d8055397 feat: add support for jfif image format in file decoder 2026-02-02 21:36:08 +08:00
Calcium-Ion
5d2e45a147 Merge pull request #2803 from seefs001/feature/qwen-responses
feat: /v1/responses qwen3 max && perplexity
2026-02-02 21:22:25 +08:00
Calcium-Ion
1788fb290e fix: claude panic (#2804) 2026-02-02 21:22:07 +08:00
Seefs
4978fead3a Merge pull request #2805 from lanfunoe/fix/make-channel-Host-override-take-effect
fix: make channel Host override take effect
2026-02-02 21:20:31 +08:00
Seefs
57b9905539 fix: claude panic 2026-02-02 15:03:30 +08:00
lanfunoe
0d5ae12ebc fix: make channel Host override take effect 2026-02-02 14:59:36 +08:00
Seefs
b6dc75cb86 feat: /v1/responses perplexity 2026-02-02 14:48:45 +08:00
Seefs
2c29993cfc feat: /v1/responses qwen3 max 2026-02-02 14:41:27 +08:00
Seefs
540cf6c991 fix: channel affinity (#2799)
* fix: channel affinity log styles

* fix: Issue with incorrect data storage when switching key sources

* feat: support not retrying after a single rule configuration fails

* fix: render channel affinity tooltip as multiline content

* feat: channel affinity cache hit

* fix: prevent ChannelAffinityUsageCacheModal infinite loading and hide data before fetch

* chore: format backend with gofmt and frontend with prettier/eslint autofix
2026-02-02 14:37:31 +08:00
Seefs
6c0e9403a2 Merge pull request #2759 from KiGamji/fix-group-colors
fix(ui): use distinct color palette for group tags
2026-02-02 13:24:15 +08:00
Seefs
76050e66ca Merge pull request #2798 from RedwindA/feat/GeminiCacheBilling
feat(gemini): support cached token billing
2026-02-02 13:21:10 +08:00
Seefs
99745e7e38 Merge pull request #2783 from feitianbubu/pr/9f3276d5637873b7b97dbdbd98ade9b372bd6f63
feat: CodeViewer click link and auto wrap
2026-02-02 13:17:01 +08:00
Seefs
621938699b Merge pull request #2733 from feitianbubu/pr/92eee074a8105d7331d0987d96dc78bae181e331
feat: task pre consume modelPrice default use setting value
2026-02-02 13:16:13 +08:00
Seefs
2d9b408fda Merge pull request #2756 from feitianbubu/pr/bae1c6025ed4c65bf72572ff8684dd7ef068e576
feat: doubao add first and last image to video
2026-02-02 13:15:47 +08:00
Seefs
63b642f39a Merge pull request #2745 from mehunk/feat/custom-stripe-url
feat: Support customizing the success and cancel url of Stripe.
2026-02-02 13:15:05 +08:00
CaIon
ff41e65d9b fix: FreeBSD build failure due to type mismatch in Statfs_t fields (#2793)
Explicitly cast Blocks, Bavail, and Bfree to uint64 for cross-platform compatibility,
as these fields are int64 on FreeBSD but uint64 on Linux.
2026-02-02 00:36:36 +08:00
RedwindA
e3f96120bc feat(gemini): support cached token billing 2026-02-01 22:50:47 +08:00
feitianbubu
ac8a92655e feat: CodeViewer click link and auto wrap 2026-01-30 11:18:58 +08:00
Calcium-Ion
1c983a04d3 feat: disk request body cache (#2780)
* feat: 引入通用 HTTP BodyStorage/DiskCache 缓存配置与管理

- 新增 common/body_storage.go 提供 HTTP 请求体存储抽象和文件缓存能力
- 增加 common/disk_cache_config.go 支持全局磁盘缓存配置
- main.go 挂载缓存初始化流程
- 新增和补充 controller/performance.go (及 unix/windows) 用于缓存性能监控接口
- middleware/body_cleanup.go 自动清理缓存文件
- router 挂载相关接口
- 前端 settings 页面新增性能监控设置 PerformanceSetting
- 优化缓存开关状态和模块热插拔能力
- 其他相关文件同步适配缓存扩展

* fix: 修复 BodyStorage 并发安全和错误处理问题

- 修复 diskStorage.Close() 竞态条件,先获取锁再执行 CAS
- 为 memoryStorage 添加互斥锁和 closed 状态检查
- 修复 CreateBodyStorageFromReader 在磁盘存储失败时的回退逻辑
- 添加缓存命中统计调用 (IncrementDiskCacheHits/IncrementMemoryCacheHits)
- 修复 gin.go 中 Seek 错误被忽略的问题
- 在 api-router 添加 BodyStorageCleanup 中间件
- 修复前端 formatBytes 对异常值的处理

Co-authored-by: Cursor <cursoragent@cursor.com>

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-01-30 01:00:49 +08:00
Calcium-Ion
1e9b567fc0 Merge pull request #2766 from seefs001/fix/response-compact-price
fix: /v1/responses/compact default billing
2026-01-29 23:24:31 +08:00
Calcium-Ion
8d5ac479f5 Merge pull request #2765 from seefs001/fix/2763
fix: remove disable_parallel_tool_use if tool_choice=none
2026-01-29 23:24:11 +08:00
Calcium-Ion
9d0fde91f7 Merge pull request #2779 from RedwindA/feat/gemini2oaiSTOP
feat(gemini): map OpenAI stop to Gemini stopSequences
2026-01-29 23:21:35 +08:00
RedwindA
826d4b9190 feat(gemini): map OpenAI stop to Gemini stopSequences 2026-01-29 21:30:27 +08:00
CaIon
b44c1304a0 Update readme docs links 2026-01-28 22:50:47 +08:00
CaIon
8464363855 Update readme 2026-01-28 22:38:14 +08:00
Seefs
b1842b908e fix: /v1/responses/compact default billing 2026-01-28 14:22:34 +08:00
Seefs
41b33e85db fix: disable_parallel_tool_use parameter should be removed for tool_choice=none: 2026-01-28 13:31:14 +08:00
CaIon
ef66cd864c docs: add HelloGitHub and Product Hunt badges to README 2026-01-28 13:29:22 +08:00
feitianbubu
8c4d2f2c2f feat: auto-adapt video modal 2026-01-28 12:55:08 +08:00
KiGamji
ca81de39c9 fix(ui): use distinct color palette for group tags 2026-01-27 17:27:35 +05:00
feitianbubu
df465ca8fd feat: doubao add first and last image to video 2026-01-27 17:33:00 +08:00
mehunk
65fd33e3ef feat: Add trusted redirect domains. 2026-01-26 22:18:04 +09:00
Calcium-Ion
d72cfc8590 Add link to GitHub Security Advisories for reporting
Updated the reporting method for security issues to include a link to the draft security advisory.
2026-01-26 20:43:12 +08:00
CaIon
04de79ac43 feat: add CODE_OF_CONDUCT and SECURITY.md files for community guidelines and vulnerability reporting 2026-01-26 20:37:50 +08:00
Calcium-Ion
e74b92276e Update LICENSE file 2026-01-26 20:24:28 +08:00
Seefs
478f1871d6 feat: grok Usage Guidelines Violation Fee (#2753)
* feat: grok Usage Guidelines Violation Fee ui setting

* feat: grok Usage Guidelines Violation Fee consume log

* fix: grok Usage Guidelines Violation Fee log detail
2026-01-26 20:20:30 +08:00
Seefs
cc1da72d10 feat: openai response /v1/response/compact (#2644)
* feat: openai response /v1/response/compact

* feat: /v1/response/compact bill

* feat: /v1/response/compact

* feat: /v1/responses/compact -> codex channel

* feat: /v1/responses/compact -> codex channel

* feat: /v1/responses/compact -> codex channel

* feat: codex channel default models

* feat: compact model price

* feat: /v1/responses/comapct test
2026-01-26 20:20:16 +08:00
Seefs
d7d3a2f763 feat: channel affinity (#2669)
* feat: channel affinity

* feat: channel affinity -> model setting

* fix: channel affinity

* feat: channel affinity op

* feat: channel_type setting

* feat: clean

* feat: cache supports both memory and Redis.

* feat: Optimise ui/ux

* feat: Optimise ui/ux

* feat: Optimise codex usage ui/ux

* feat: Optimise ui/ux

* feat: Optimise ui/ux

* feat: Optimise ui/ux

* feat: If the affinitized channel fails and a retry succeeds on another channel, update the affinity to the successful channel
2026-01-26 19:57:41 +08:00
Seefs
7da04be52b fix: test using the correct path for rerank (#2736)
* fix: test using the correct path for rerank.

* fix: The `input` parameter for testing responses uses an array to accommodate certain channels, such as Codex, which are incompatible with single strings.
2026-01-26 19:57:15 +08:00
Seefs
ac8f17c827 Merge pull request #2735 from seefs001/feature/header-throughpass
feat: header passthrough
2026-01-26 19:56:51 +08:00
Seefs
6b85114148 Merge pull request #2610 from Bliod-Cook/main 2026-01-26 17:30:02 +08:00
Seefs
35c055e1c1 Merge pull request #2749 from wans10/main 2026-01-26 16:50:44 +08:00
Calcium-Ion
3722c63c18 Merge pull request #2742 from seefs001/fix/pr-2540
feat(gemini): 支持 tool_choice 参数转换,优化多个渠道错误处理
2026-01-26 15:12:09 +08:00
Bliod-Cook
d77bb794d0 Merge branch 'main' into pr/Bliod-Cook/2610 2026-01-26 05:23:51 +00:00
wans10
f00a25c7ec fix(topup): 修复用户配额获取逻辑
- 移除用户ID检查条件,始终获取最新用户数据
- 确保余额等统计信息准确性
- 解决用户状态更新不及时的问题
2026-01-26 09:34:09 +08:00
mehunk
d10f9126a4 docs: Update the comment of the functions. 2026-01-25 20:40:51 +09:00
mehunk
94076def9c feat: Support customizing the success and cancel url of Stripe. 2026-01-25 18:23:51 +09:00
Seefs
df43193600 Merge pull request #2738 from Li-Xingyu/main 2026-01-25 16:42:36 +08:00
Seefs
7d64e5908c Revert "feat: xai refusal reason"
This reverts commit fd25b60e7a.
2026-01-25 16:38:58 +08:00
Seefs
fd25b60e7a feat: xai refusal reason 2026-01-25 16:07:37 +08:00
Seefs
48efa1ddb9 fix: reason convert 2026-01-25 15:31:23 +08:00
Seefs
7afa476770 feat: claude refusal reason 2026-01-25 15:21:31 +08:00
Seefs
dda40ef62a feat: logs show reject reason 2026-01-25 15:00:30 +08:00
Seefs
00c5d9ffdf feat: logs show reject reason 2026-01-25 14:52:18 +08:00
Li-Xingyu
ec826e67b5 feat: enhance Authorization header handling with Header Override support 2026-01-25 14:36:37 +08:00
Seefs
68d9a227dd fix: Charge locally even if there's an error 2026-01-25 14:32:51 +08:00
Seefs
d5b3d4b990 Merge branch 'upstream-main' into fix/pr-2540
# Conflicts:
#	relay/channel/gemini/relay-gemini.go
2026-01-25 14:14:05 +08:00
Seefs
c49f820e24 Merge pull request #2693 from daggeryu/main
fix request pass-through aws channels can't test
2026-01-25 12:10:34 +08:00
Li-Xingyu
1e0ba95dc0 feat: enhance Authorization header handling with Header Override support 2026-01-25 04:28:11 +08:00
feitianbubu
9c91b8fb18 feat: task pre consume modelPrice default use setting value 2026-01-24 15:32:06 +08:00
Seefs
51751c9101 Merge pull request #2713 from feitianbubu/pr/0eba660886e20b852bff73ff4ecd52d73a447981 2026-01-22 19:46:17 +08:00
Calcium-Ion
d841481e82 Merge pull request #2717 from xyfacai/feat/qwen-config
feat(qwen): support qwen image sync image model config
2026-01-22 18:50:39 +08:00
Xyfacai
cf745623f8 feat(qwen): support qwen image sync image model config 2026-01-22 17:46:36 +08:00
feitianbubu
151d7bedae feat: requestId time string use UTC 2026-01-22 08:58:35 +08:00
Calcium-Ion
b28ac71722 Merge pull request #2703 from seefs001/feature/log-conversion-info
feat: log shows request conversion
2026-01-21 23:48:08 +08:00
Calcium-Ion
824c7a2cd6 Merge pull request #2702 from seefs001/fix/ali-baseurl
fix: replace Alibaba's Claude-compatible url with new url
2026-01-21 23:47:59 +08:00
Calcium-Ion
702c05c7b1 Merge pull request #2701 from seefs001/fix/gemini-tool-call-index
fix: calls to multiple tools in gemini all return index=0
2026-01-21 23:47:48 +08:00
Calcium-Ion
c08a9348b3 Merge pull request #2663 from seefs001/feature/retry-status-code
feat: customizable automatic retry status codes
2026-01-21 23:47:31 +08:00
Calcium-Ion
1af0269d78 Merge pull request #2684 from seefs001/fix/codex-rm-max-output-tokens
fix: codex Unsupported parameter: max_output_tokens
2026-01-21 23:47:17 +08:00
Calcium-Ion
4a6e423120 Merge pull request #2676 from seefs001/fix/aff-login-method
fix: the login method cannot be displayed under the aff link.
2026-01-21 23:47:05 +08:00
Calcium-Ion
e25478b10b Merge pull request #2668 from seefs001/feature/ignore-tls-config
feat: TLS_INSECURE_SKIP_VERIFY env
2026-01-21 23:46:52 +08:00
Calcium-Ion
089dd8aa45 Merge pull request #2667 from seefs001/fix/gemini-whitelist-field
fix: openAI function to gemini function field adjusted to whitelist mode
2026-01-21 23:45:57 +08:00
Calcium-Ion
9f7ec08106 Merge pull request #2710 from QuantumNous/revert-2691-pr/5f73324da8aebf6a98269c242dda05da3ea6d7bc
Revert "fix: video content api Priority use url field"
2026-01-21 23:39:59 +08:00
Seefs
fdaa573c11 Revert "fix: video content api Priority use url field" 2026-01-21 23:38:47 +08:00
Seefs
46aae7358f fix: codex rm Temperature 2026-01-21 23:22:31 +08:00
Seefs
642aa092f6 Merge pull request #2688 from feitianbubu/pr/c579c7755c6b03e207853d06cc695ca7903388c9 2026-01-21 22:53:21 +08:00
Seefs
c24e68e0a6 Merge pull request #2691 from feitianbubu/pr/5f73324da8aebf6a98269c242dda05da3ea6d7bc 2026-01-21 22:52:36 +08:00
Seefs
bc2569b649 Merge pull request #2696 from Bliod-Cook/email-verification-fix 2026-01-21 22:51:06 +08:00
Seefs
5c01b77357 feat: optimized display 2026-01-21 00:17:20 +08:00
Seefs
3728fbdbf5 feat: optimized display 2026-01-21 00:12:41 +08:00
Seefs
6582020c80 feat: optimized display 2026-01-21 00:01:36 +08:00
Seefs
d4582ede98 feat: log shows request conversion 2026-01-20 23:43:29 +08:00
Seefs
9037d992be fix: Only models with the "qwen" designation can use the Claude-compatible interface; others require conversion. 2026-01-20 22:56:02 +08:00
Seefs
63921912dd fix: replace Alibaba's Claude-compatible interface with the new interface 2026-01-20 22:36:36 +08:00
Seefs
57ed2b3dae Merge pull request #2690 from feitianbubu/pr/0d926e8180210062b85a4ee06a0b324ba9ec91f6 2026-01-20 22:21:21 +08:00
Seefs
809a80815e fix: issue where consecutive calls to multiple tools in gemini all returned an index of 0 2026-01-20 22:03:19 +08:00
Bliod
c149c9cfcf fix: fix email send 2026-01-20 04:29:56 +00:00
daggeryu
f538336f7f fix request pass-through aws channels can't test
common.GetRequestBody(c) read bod is null
2026-01-20 10:08:56 +08:00
CaIon
d2df342f4e fix: update abortWithOpenAiMessage function to use types.ErrorCode 2026-01-19 17:35:28 +08:00
feitianbubu
fac4a5ffdd fix: video content api Priority use url field 2026-01-19 16:16:11 +08:00
feitianbubu
3b01cb3f41 fix: update warning threshold label from '5$' to '2$' 2026-01-19 12:58:10 +08:00
feitianbubu
575574f068 fix: jimeng i2v support multi image by metadata 2026-01-19 11:34:46 +08:00
Seefs
76164e951e fix: codex Unsupported parameter: max_output_tokens 2026-01-17 21:42:28 +08:00
Seefs
f96615110d fix: the login method cannot be displayed under the aff link. 2026-01-15 23:19:51 +08:00
Seefs
8ef99f4728 Merge pull request #2670 from seefs001/fix/chat2claude-log 2026-01-15 15:35:17 +08:00
Seefs
1d8a11b37a fix: for chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently. 2026-01-15 15:28:02 +08:00
Seefs
af2d6ad8d2 feat: TLS_INSECURE_SKIP_VERIFY env 2026-01-15 14:43:53 +08:00
Seefs
ea802f2297 fix: openAI function to gemini function field adjusted to whitelist mode 2026-01-15 13:26:42 +08:00
Seefs
e5cb9ac03a feat: codex channel (#2652)
* feat: codex channel

* feat: codex channel

* feat: codex oauth flow

* feat: codex refresh cred

* feat: codex usage

* fix: codex err message detail

* fix: codex setting ui

* feat: codex refresh cred task

* fix: import err

* fix: codex store must be false

* fix: chat -> responses tool call

* fix: chat -> responses tool call
2026-01-14 22:29:43 +08:00
Seefs
ca11fcbabd Merge pull request #2632 from feitianbubu/pr/add-doubao-video-1.5 2026-01-14 16:33:30 +08:00
Seefs
4bffc249d6 feat: customizable automatic retry status codes 2026-01-14 14:34:12 +08:00
Seefs
6169f46cc6 Merge pull request #2627 from seefs001/feature/channel-test-param-override
feat: channel testing supports parameter overriding
2026-01-12 18:49:05 +08:00
Calcium-Ion
d73a0440b9 Merge pull request #2642 from seefs001/fix/gemini-propertyNames
fix: clean propertyNames for gemini function
2026-01-12 18:48:24 +08:00
Seefs
688280b3c3 fix: chat2response setting ui (#2643)
* fix: setting ui

* fix: rm global.chat_completions_to_responses_policy

* fix: rm global.chat_completions_to_responses_policy
2026-01-12 18:48:05 +08:00
Seefs
41da848c56 Merge pull request #2647 from seefs001/feature/status-code-auto-disable
feat: status code auto-disable configuration
2026-01-12 18:47:45 +08:00
Seefs
22b9438588 Merge pull request #2646 from deanxv/fix/gemini-unmarshal 2026-01-12 12:33:01 +08:00
dean
4ed4a7659a fix: support snake_case fields in GeminiChatGenerationConfig 2026-01-12 12:23:24 +08:00
Seefs
138fcd2327 fix: clean propertyNames for gemini function 2026-01-11 23:34:18 +08:00
Seefs
62b796fa6a feat: /v1/chat/completion -> /v1/response (#2629)
* feat: /v1/chat/completion -> /v1/response
2026-01-11 21:38:07 +08:00
feitianbubu
a4f28f0b79 feat: add doubao video 1.5 2026-01-10 22:23:31 +08:00
Seefs
1a5c8f3c35 Merge pull request #2615 from RedwindA/feat/GeminiNativeFetchModels
fix(gemini): fetch model list via native v1beta/models endpoint
2026-01-09 23:54:36 +08:00
Seefs
e1d43a820f Merge pull request #2619 from RedwindA/fix/disableMinimaxFetchModels
fix: remove Minimax from FETCHABLE channels
2026-01-09 21:42:25 +08:00
RedwindA
c975b4cfa8 fix(minimax): 添加 MiniMax-M2 系列模型到 ModelList 2026-01-09 20:46:47 +08:00
RedwindA
b51d1e2f78 fix: remove Minimax from FETCHABLE channels 2026-01-09 20:37:12 +08:00
RedwindA
07e77b3c6f refactor(gemini): 更新 GeminiModelsResponse 以使用 dto.GeminiModel 类型 2026-01-09 18:08:11 +08:00
RedwindA
c2464fc877 fix(gemini): fetch model list via native v1beta/models endpoint
Use the native Gemini Models API (/v1beta/models) instead of the OpenAI-compatible
path when listing models for Gemini channels, improving compatibility with
third-party Gemini-format providers that don't implement OpenAI routes.

- Add paginated model listing with timeout and optional proxy support
- Select an enabled key for multi-key Gemini channels
2026-01-09 18:00:40 +08:00
Bliod-Cook
b6313a1354 Merge branch 'QuantumNous:main' into main 2026-01-08 23:30:45 +08:00
Bliod
d4edec6ac2 feat: select model in visualized modl mapping 2026-01-08 15:26:59 +00:00
CaIon
93012631d1 docs: update readme 2026-01-07 20:52:27 +08:00
Xyfacai
1ab0c35540 Merge pull request #2590 from xyfacai/fix/max-body-limit
fix: 设置默认max req body 为128MB
2026-01-06 21:47:12 +08:00
Xyfacai
022fbcee04 Merge pull request #2588 from PowerfulBart/fix/auto-group-task-logging
fix(task): 修复使用 auto 分组时 Task Relay 不记录日志和不扣费的问题
2026-01-06 11:14:49 +08:00
郑伯涛
aed1900364 fix(task): 修复使用 auto 分组时 Task Relay 不记录日志和不扣费的问题
问题描述:
- 使用 auto 分组的令牌调用 /v1/videos 等 Task 接口时,虽然任务能成功创建,
  但使用日志不显示记录,且不会扣费

根本原因:
- Distribute 中间件在选择渠道后,会将实际选中的分组存储在 ContextKeyAutoGroup 中
- 但 RelayTaskSubmit 函数没有从 context 中读取这个值来更新 info.UsingGroup
- 导致 info.UsingGroup 始终是 "auto" 而不是实际选中的分组(如 "sora2逆")
- 当 auto 分组的倍率配置为 0 时,quota 计算结果为 0
- 日志记录条件 "if quota != 0" 不满足,导致日志不记录、不扣费

修复方案:
- 在 RelayTaskSubmit 函数中计算分组倍率之前,添加从 ContextKeyAutoGroup
  获取实际分组的逻辑
- 使用安全的类型断言,避免潜在的 panic 风险

影响范围:
- 仅影响 Task Relay 流程(/v1/videos, /suno, /kling 等接口)
- 不影响使用具体分组令牌的调用
- 不影响其他 Relay 类型(chat/completions 等已有类似处理逻辑)
2026-01-06 00:16:50 +08:00
Seefs
f8938a8f78 Merge pull request #2587 from xiangsx/main 2026-01-05 23:36:16 +08:00
xiangsx
e13459f3a1 feat: add regex pattern to mask API keys in sensitive information 2026-01-05 22:44:11 +08:00
Your Name
b6a25d9f0f feat(gemini): 支持 tool_choice 参数转换,优化错误处理 2025-12-27 18:33:09 +08:00
203 changed files with 14430 additions and 2353 deletions

View File

@@ -57,6 +57,9 @@
# 流模式无响应超时时间,单位秒,如果出现空补全可以尝试改为更大值
# STREAMING_TIMEOUT=300
# TLS / HTTP 跳过验证设置
# TLS_INSECURE_SKIP_VERIFY=false
# Gemini 识别图片 最大图片数量
# GEMINI_VISION_MAX_IMAGE_NUM=16
@@ -82,3 +85,8 @@ LINUX_DO_USER_ENDPOINT=https://connect.linux.do/api/user
# 节点类型
# 如果是主节点则为master
# NODE_TYPE=master
# 可信任重定向域名列表(逗号分隔,支持子域名匹配)
# 用于验证支付成功/取消回调URL的域名安全性
# 示例: example.com,myapp.io 将允许 example.com, sub.example.com, myapp.io 等
# TRUSTED_REDIRECT_DOMAINS=example.com,myapp.io

83
.github/CODE_OF_CONDUCT.md vendored Normal file
View File

@@ -0,0 +1,83 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual orientation.
We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our community include:
- Demonstrating empathy and kindness toward other people
- Being respectful of differing opinions, viewpoints, and experiences
- Giving and gracefully accepting constructive feedback
- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
- Focusing on what is best not just for us as individuals, but for the overall community
Examples of unacceptable behavior include:
- The use of sexualized language or imagery, and sexual attention or advances of any kind
- Trolling, insulting or derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing others' private information, such as a physical or email address, without their explicit permission
- Other conduct which could reasonably be considered inappropriate in a professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at:
**Email:** support@quantumnous.com
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact:** Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
**Consequence:** A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact:** A violation through a single incident or series of actions.
**Consequence:** A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
### 3. Temporary Ban
**Community Impact:** A serious violation of community standards, including sustained inappropriate behavior.
**Consequence:** A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact:** Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
**Consequence:** A permanent ban from any sort of public interaction within the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
[homepage]: https://www.contributor-covenant.org

86
.github/SECURITY.md vendored Normal file
View File

@@ -0,0 +1,86 @@
# Security Policy
## Supported Versions
We provide security updates for the following versions:
| Version | Supported |
| ------- | ------------------ |
| Latest | :white_check_mark: |
| Older | :x: |
We strongly recommend that users always use the latest version for the best security and features.
## Reporting a Vulnerability
We take security vulnerability reports very seriously. If you discover a security issue, please follow the steps below for responsible disclosure.
### How to Report
**Do NOT** report security vulnerabilities in public GitHub Issues.
To report a security issue, please use the GitHub Security Advisories tab to "[Open a draft security advisory](https://github.com/QuantumNous/new-api/security/advisories/new)". This is the preferred method as it provides a built-in private communication channel.
Alternatively, you can report via email:
- **Email:** support@quantumnous.com
- **Subject:** `[SECURITY] Security Vulnerability Report`
### What to Include
To help us understand and resolve the issue more quickly, please include the following information in your report:
1. **Vulnerability Type** - Brief description of the vulnerability (e.g., SQL injection, XSS, authentication bypass, etc.)
2. **Affected Component** - Affected file paths, endpoints, or functional modules
3. **Reproduction Steps** - Detailed steps to reproduce
4. **Impact Assessment** - Potential security impact and severity assessment
5. **Proof of Concept** - If possible, provide proof of concept code or screenshots (do not test in production environments)
6. **Suggested Fix** - If you have a fix suggestion, please provide it
7. **Your Contact Information** - So we can communicate with you
## Response Process
1. **Acknowledgment:** We will acknowledge receipt of your report within **48 hours**.
2. **Initial Assessment:** We will complete an initial assessment and communicate with you within **7 days**.
3. **Fix Development:** Based on the severity of the vulnerability, we will prioritize developing a fix.
4. **Security Advisory:** After the fix is released, we will publish a security advisory (if applicable).
5. **Credit:** If you wish, we will credit your contribution in the security advisory.
## Security Best Practices
When deploying and using New API, we recommend following these security best practices:
### Deployment Security
- **Use HTTPS:** Always serve over HTTPS to ensure transport layer security
- **Firewall Configuration:** Only open necessary ports and restrict access to management interfaces
- **Regular Updates:** Update to the latest version promptly to receive security patches
- **Environment Isolation:** Use separate database and Redis instances in production
### API Key Security
- **Key Protection:** Do not expose API keys in client-side code or public repositories
- **Least Privilege:** Create different API keys for different purposes, following the principle of least privilege
- **Regular Rotation:** Rotate API keys regularly
- **Monitor Usage:** Monitor API key usage and detect anomalies promptly
### Database Security
- **Strong Passwords:** Use strong passwords to protect database access
- **Network Isolation:** Database should not be directly exposed to the public internet
- **Regular Backups:** Regularly backup the database and verify backup integrity
- **Access Control:** Limit database user permissions, following the principle of least privilege
## Security-Related Configuration
Please ensure the following security-related environment variables and settings are properly configured:
- `SESSION_SECRET` - Use a strong random string
- `SQL_DSN` - Ensure database connection uses secure configuration
- `REDIS_CONN_STRING` - If using Redis, ensure secure connection
For detailed configuration instructions, please refer to the project documentation.
## Disclaimer
This project is provided "as is" without any express or implied warranty. Users should assess the security risks of using this software in their environment.

692
LICENSE
View File

@@ -1,103 +1,661 @@
# **New API 许可协议 (Licensing)**
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
本项目采用**基于使用场景的双重许可 (Usage-Based Dual Licensing)** 模式。
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
**核心原则:**
Preamble
- **默认许可:** 本项目默认在 **GNU Affero 通用公共许可证 v3.0 (AGPLv3)** 下提供。任何用户在遵守 AGPLv3 条款和下述附加限制的前提下,均可免费使用。
- **商业许可:** 在特定商业场景下,或当您希望获得 AGPLv3 之外的权利时,**必须**获取**商业许可证 (Commercial License)**。
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
---
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
## **1. 开源许可证 (Open Source License): AGPLv3 - 适用于基础使用**
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
- 在遵守 **AGPLv3** 条款的前提下,您可以自由地使用、修改和分发 New API。AGPLv3 的完整文本可以访问 [https://www.gnu.org/licenses/agpl-3.0.html](https://www.gnu.org/licenses/agpl-3.0.html) 获取。
- **核心义务:** AGPLv3 的一个关键要求是,如果您修改了 New API 并通过网络提供服务 (SaaS),或者分发了修改后的版本,您必须以 AGPLv3 许可证向所有用户提供相应的**完整源代码**。
- **附加限制 (重要):** 在仅使用 AGPLv3 开源许可证的情况下,您**必须**完整保留项目代码中原有的品牌标识、LOGO 及版权声明信息。**禁止以任何形式修改、移除或遮盖**这些信息。如需移除,必须获取商业许可证。
- 使用前请务必仔细阅读并理解 AGPLv3 的所有条款及上述附加限制。
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
## **2. 商业许可证 (Commercial License) - 适用于高级场景及闭源需求**
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
在以下任一情况下,您**必须**联系我们获取并签署一份商业许可证,才能合法使用 New API
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
- **场景一:移除品牌和版权信息**
您希望在您的产品或服务中移除 New API 的 LOGO、UI界面中的版权声明或其他品牌标识。
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
- **场景二:规避 AGPLv3 开源义务**
您基于 New API 进行了修改,并希望:
- 通过网络提供服务SaaS但**不希望**向您的服务用户公开您修改后的源代码。
- 分发一个集成了 New API 的软件产品,但**不希望**以 AGPLv3 许可证发布您的产品或公开源代码。
The precise terms and conditions for copying, distribution and
modification follow.
- **场景三:企业政策与集成需求**
- 您所在公司的政策、客户合同或项目要求不允许使用 AGPLv3 许可的软件。
- 您需要进行 OEM 集成,将 New API 作为您闭源商业产品的一部分进行再分发。
TERMS AND CONDITIONS
- **场景四:需要商业支持与保障**
您需要 AGPLv3 未提供的商业保障,如官方技术支持等。
0. Definitions.
**获取商业许可:**
请通过电子邮件 **support@quantumnous.com** 联系 New API 团队洽谈商业授权事宜。
"This License" refers to version 3 of the GNU Affero General Public License.
## **3. 贡献 (Contributions)**
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
- 我们欢迎社区对 New API 的贡献。所有向本项目提交的贡献(例如通过 Pull Request都将被视为在 **AGPLv3** 许可证下提供。
- 通过向本项目提交贡献,即表示您同意您的代码以 AGPLv3 许可证授权给本项目及所有后续使用者(无论这些使用者最终遵循 AGPLv3 还是商业许可)。
- 您也理解并同意,您的贡献可能会被包含在根据商业许可证分发的 New API 版本中。
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
## **4. 其他条款 (Other Terms)**
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
- 关于商业许可证的具体条款、条件和价格,以双方签署的正式商业许可协议为准。
- 项目维护者保留根据需要更新本许可政策的权利。相关更新将通过项目官方渠道(如代码仓库、官方网站)进行通知。
A "covered work" means either the unmodified Program or a work based
on the Program.
---
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
# **New API Licensing**
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
This project uses a **Usage-Based Dual Licensing** model.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
**Core Principles:**
1. Source Code.
- **Default License:** This project is available by default under the **GNU Affero General Public License v3.0 (AGPLv3)**. Any user may use it free of charge, provided they comply with both the AGPLv3 terms and the additional restrictions listed below.
- **Commercial License:** For specific commercial scenarios, or if you require rights beyond those granted by AGPLv3, you **must** obtain a **Commercial License**.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
---
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
## **1. Open Source License: AGPLv3 For Basic Usage**
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
- Under the terms of the **AGPLv3**, you are free to use, modify, and distribute New API. The complete AGPLv3 license text can be viewed at [https://www.gnu.org/licenses/agpl-3.0.html](https://www.gnu.org/licenses/agpl-3.0.html).
- **Core Obligation:** A key AGPLv3 requirement is that if you modify New API and provide it as a network service (SaaS), or distribute a modified version, you must make the **complete corresponding source code** available to all users under the AGPLv3 license.
- **Additional Restriction (Important):** When using only the AGPLv3 open-source license, you **must** retain all original branding, logos, and copyright statements within the projects code. **You are strictly prohibited from modifying, removing, or concealing** any such information. If you wish to remove this, you must obtain a Commercial License.
- Please read and ensure that you fully understand all AGPLv3 terms and the above additional restriction before use.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
## **2. Commercial License For Advanced Scenarios & Closed Source Needs**
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
You **must** contact us to obtain and sign a Commercial License in any of the following scenarios in order to legally use New API:
The Corresponding Source for a work in source code form is that
same work.
- **Scenario 1: Removal of Branding and Copyright**
You wish to remove the New API logo, copyright statement, or other branding elements from your product or service.
2. Basic Permissions.
- **Scenario 2: Avoidance of AGPLv3 Open Source Obligations**
You have modified New API and wish to:
- Offer it as a network service (SaaS) **without** disclosing your modifications' source code to your users.
- Distribute a software product integrated with New API **without** releasing your product under AGPLv3 or open-sourcing the code.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
- **Scenario 3: Enterprise Policy & Integration Needs**
- Your organizations policies, client contracts, or project requirements prohibit the use of AGPLv3-licensed software.
- You require OEM integration and need to redistribute New API as part of your closed-source commercial product.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
- **Scenario 4: Commercial Support and Assurances**
You require commercial assurances not provided by AGPLv3, such as official technical support.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
**Obtaining a Commercial License:**
Please contact the New API team via email at **support@quantumnous.com** to discuss commercial licensing.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
## **3. Contributions**
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
- We welcome community contributions to New API. All contributions (e.g., via Pull Request) are deemed to be provided under the **AGPLv3** license.
- By submitting a contribution, you agree that your code is licensed to this project and all downstream users under the AGPLv3 license (regardless of whether those users ultimately operate under AGPLv3 or a Commercial License).
- You also acknowledge and agree that your contribution may be included in New API releases distributed under a Commercial License.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
## **4. Other Terms**
4. Conveying Verbatim Copies.
- The specific terms, conditions, and pricing of the Commercial License are governed by the formal commercial license agreement executed by both parties.
- Project maintainers reserve the right to update this licensing policy as needed. Updates will be communicated via official project channels (e.g., repository, official website).
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

View File

@@ -1,457 +0,0 @@
<div align="center">
![new-api](/web/public/logo.png)
# New API
🍥 **Next-Generation Large Model Gateway and AI Asset Management System**
<p align="center">
<a href="./README.md">中文</a> |
<strong>English</strong> |
<a href="./README.fr.md">Français</a> |
<a href="./README.ja.md">日本語</a>
</p>
<p align="center">
<a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
<img src="https://img.shields.io/github/license/Calcium-Ion/new-api?color=brightgreen" alt="license">
</a>
<a href="https://github.com/Calcium-Ion/new-api/releases/latest">
<img src="https://img.shields.io/github/v/release/Calcium-Ion/new-api?color=brightgreen&include_prereleases" alt="release">
</a>
<a href="https://github.com/users/Calcium-Ion/packages/container/package/new-api">
<img src="https://img.shields.io/badge/docker-ghcr.io-blue" alt="docker">
</a>
<a href="https://hub.docker.com/r/CalciumIon/new-api">
<img src="https://img.shields.io/badge/docker-dockerHub-blue" alt="docker">
</a>
<a href="https://goreportcard.com/report/github.com/Calcium-Ion/new-api">
<img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
</a>
</p>
<p align="center">
<a href="https://trendshift.io/repositories/8227" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
</a>
</p>
<p align="center">
<a href="#-quick-start">Quick Start</a> •
<a href="#-key-features">Key Features</a> •
<a href="#-deployment">Deployment</a> •
<a href="#-documentation">Documentation</a> •
<a href="#-help-support">Help</a>
</p>
</div>
## 📝 Project Description
> [!NOTE]
> This is an open-source project developed based on [One API](https://github.com/songquanpeng/one-api)
> [!IMPORTANT]
> - This project is for personal learning purposes only, with no guarantee of stability or technical support
> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes
> - According to the [《Interim Measures for the Management of Generative Artificial Intelligence Services》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), please do not provide any unregistered generative AI services to the public in China.
---
## 🤝 Trusted Partners
<p align="center">
<em>No particular order</em>
</p>
<p align="center">
<a href="https://www.cherry-ai.com/" target="_blank">
<img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
</a>
<a href="https://bda.pku.edu.cn/" target="_blank">
<img src="./docs/images/pku.png" alt="Peking University" height="80" />
</a>
<a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
<img src="./docs/images/ucloud.png" alt="UCloud" height="80" />
</a>
<a href="https://www.aliyun.com/" target="_blank">
<img src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="80" />
</a>
<a href="https://io.net/" target="_blank">
<img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
</a>
</p>
---
## 🙏 Special Thanks
<p align="center">
<a href="https://www.jetbrains.com/?from=new-api" target="_blank">
<img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
</a>
</p>
<p align="center">
<strong>Thanks to <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> for providing free open-source development license for this project</strong>
</p>
---
## 🚀 Quick Start
### Using Docker Compose (Recommended)
```bash
# Clone the project
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# Edit docker-compose.yml configuration
nano docker-compose.yml
# Start the service
docker-compose up -d
```
<details>
<summary><strong>Using Docker Commands</strong></summary>
```bash
# Pull the latest image
docker pull calciumion/new-api:latest
# Using SQLite (default)
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
# Using MySQL
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
> **💡 Tip:** `-v ./data:/data` will save data in the `data` folder of the current directory, you can also change it to an absolute path like `-v /your/custom/path:/data`
</details>
---
🎉 After deployment is complete, visit `http://localhost:3000` to start using!
📖 For more deployment methods, please refer to [Deployment Guide](https://docs.newapi.pro/en/docs/installation)
---
## 📚 Documentation
<div align="center">
### 📖 [Official Documentation](https://docs.newapi.pro/en/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
</div>
**Quick Navigation:**
| Category | Link |
|------|------|
| 🚀 Deployment Guide | [Installation Documentation](https://docs.newapi.pro/en/docs/installation) |
| ⚙️ Environment Configuration | [Environment Variables](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables) |
| 📡 API Documentation | [API Documentation](https://docs.newapi.pro/en/docs/api) |
| ❓ FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
---
## ✨ Key Features
> For detailed features, please refer to [Features Introduction](https://docs.newapi.pro/en/docs/guide/wiki/basic-concepts/features-introduction)
### 🎨 Core Functions
| Feature | Description |
|------|------|
| 🎨 New UI | Modern user interface design |
| 🌍 Multi-language | Supports Chinese, English, French, Japanese |
| 🔄 Data Compatibility | Fully compatible with the original One API database |
| 📈 Data Dashboard | Visual console and statistical analysis |
| 🔒 Permission Management | Token grouping, model restrictions, user management |
### 💰 Payment and Billing
- ✅ Online recharge (EPay, Stripe)
- ✅ Pay-per-use model pricing
- ✅ Cache billing support (OpenAI, Azure, DeepSeek, Claude, Qwen and all supported models)
- ✅ Flexible billing policy configuration
### 🔐 Authorization and Security
- 😈 Discord authorization login
- 🤖 LinuxDO authorization login
- 📱 Telegram authorization login
- 🔑 OIDC unified authentication
### 🚀 Advanced Features
**API Format Support:**
- ⚡ [OpenAI Responses](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session) (including Azure)
- ⚡ [Claude Messages](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
- 🔄 [Rerank Models](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) (Cohere, Jina)
**Intelligent Routing:**
- ⚖️ Channel weighted random
- 🔄 Automatic retry on failure
- 🚦 User-level model rate limiting
**Format Conversion:**
- 🔄 OpenAI ⇄ Claude Messages
- 🔄 OpenAI ⇄ Gemini Chat
- 🔄 Thinking-to-content functionality
**Reasoning Effort Support:**
<details>
<summary>View detailed configuration</summary>
**OpenAI series models:**
- `o3-mini-high` - High reasoning effort
- `o3-mini-medium` - Medium reasoning effort
- `o3-mini-low` - Low reasoning effort
- `gpt-5-high` - High reasoning effort
- `gpt-5-medium` - Medium reasoning effort
- `gpt-5-low` - Low reasoning effort
**Claude thinking models:**
- `claude-3-7-sonnet-20250219-thinking` - Enable thinking mode
**Google Gemini series models:**
- `gemini-2.5-flash-thinking` - Enable thinking mode
- `gemini-2.5-flash-nothinking` - Disable thinking mode
- `gemini-2.5-pro-thinking` - Enable thinking mode
- `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens
- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed).
</details>
---
## 🤖 Model Support
> For details, please refer to [API Documentation - Relay Interface](https://docs.newapi.pro/en/docs/api)
| Model Type | Description | Documentation |
|---------|------|------|
| 🤖 OpenAI GPTs | gpt-4-gizmo-* series | - |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/en/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/en/api/suno-music) |
| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) |
| 💬 Claude | Messages format | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message) |
| 🌐 Gemini | Google Gemini format | [Documentation](https://doc.newapi.pro/en/api/google-gemini-chat) |
| 🔧 Dify | ChatFlow mode | - |
| 🎯 Custom | Supports complete call address | - |
### 📡 Supported Interfaces
<details>
<summary>View complete interface list</summary>
- [Chat Interface (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-chat-completion)
- [Response Interface (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
- [Image Interface (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/v1-images-generations--post)
- [Audio Interface (Audio)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/create-transcription)
- [Video Interface (Video)](https://docs.newapi.pro/en/docs/api/ai-model/videos/create-video-generation)
- [Embedding Interface (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/create-embedding)
- [Rerank Interface (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank)
- [Realtime Conversation (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session)
- [Claude Chat](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
- [Google Gemini Chat](https://doc.newapi.pro/en/api/google-gemini-chat)
</details>
---
## 🚢 Deployment
> [!TIP]
> **Latest Docker image:** `calciumion/new-api:latest`
### 📋 Deployment Requirements
| Component | Requirement |
|------|------|
| **Local database** | SQLite (Docker must mount `/data` directory)|
| **Remote database** | MySQL ≥ 5.7.8 or PostgreSQL ≥ 9.6 |
| **Container engine** | Docker / Docker Compose |
### ⚙️ Environment Variable Configuration
<details>
<summary>Common environment variable configuration</summary>
| Variable Name | Description | Default Value |
|--------|------|--------|
| `SESSION_SECRET` | Session secret (required for multi-machine deployment) | - |
| `CRYPTO_SECRET` | Encryption secret (required for Redis) | - |
| `SQL_DSN` | Database connection string | - |
| `REDIS_CONN_STRING` | Redis connection string | - |
| `STREAMING_TIMEOUT` | Streaming timeout (seconds) | `300` |
| `STREAM_SCANNER_MAX_BUFFER_MB` | Max per-line buffer (MB) for the stream scanner; increase when upstream sends huge image/base64 payloads | `64` |
| `MAX_REQUEST_BODY_MB` | Max request body size (MB, counted **after decompression**; prevents huge requests/zip bombs from exhausting memory). Exceeding it returns `413` | `32` |
| `AZURE_DEFAULT_API_VERSION` | Azure API version | `2025-04-01-preview` |
| `ERROR_LOG_ENABLED` | Error log switch | `false` |
| `PYROSCOPE_URL` | Pyroscope server address | - |
| `PYROSCOPE_APP_NAME` | Pyroscope application name | `new-api` |
| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope basic auth user | - |
| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope basic auth password | - |
| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex sampling rate | `5` |
| `PYROSCOPE_BLOCK_RATE` | Pyroscope block sampling rate | `5` |
| `HOSTNAME` | Hostname tag for Pyroscope | `new-api` |
📖 **Complete configuration:** [Environment Variables Documentation](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables)
</details>
### 🔧 Deployment Methods
<details>
<summary><strong>Method 1: Docker Compose (Recommended)</strong></summary>
```bash
# Clone the project
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# Edit configuration
nano docker-compose.yml
# Start service
docker-compose up -d
```
</details>
<details>
<summary><strong>Method 2: Docker Commands</strong></summary>
**Using SQLite:**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
**Using MySQL:**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
> **💡 Path explanation:**
> - `./data:/data` - Relative path, data saved in the data folder of the current directory
> - You can also use absolute path, e.g.: `/your/custom/path:/data`
</details>
<details>
<summary><strong>Method 3: BaoTa Panel</strong></summary>
1. Install BaoTa Panel (≥ 9.2.0 version)
2. Search for **New-API** in the application store
3. One-click installation
📖 [Tutorial with images](./docs/BT.md)
</details>
### ⚠️ Multi-machine Deployment Considerations
> [!WARNING]
> - **Must set** `SESSION_SECRET` - Otherwise login status inconsistent
> - **Shared Redis must set** `CRYPTO_SECRET` - Otherwise data cannot be decrypted
### 🔄 Channel Retry and Cache
**Retry configuration:** `Settings → Operation Settings → General Settings → Failure Retry Count`
**Cache configuration:**
- `REDIS_CONN_STRING`: Redis cache (recommended)
- `MEMORY_CACHE_ENABLED`: Memory cache
---
## 🔗 Related Projects
### Upstream Projects
| Project | Description |
|------|------|
| [One API](https://github.com/songquanpeng/one-api) | Original project base |
| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney interface support |
### Supporting Tools
| Project | Description |
|------|------|
| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key quota query tool |
| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API high-performance optimized version |
---
## 💬 Help Support
### 📖 Documentation Resources
| Resource | Link |
|------|------|
| 📘 FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
| 🐛 Issue Feedback | [Issue Feedback](https://docs.newapi.pro/en/docs/support/feedback-issues) |
| 📚 Complete Documentation | [Official Documentation](https://docs.newapi.pro/en/docs) |
### 🤝 Contribution Guide
Welcome all forms of contribution!
- 🐛 Report Bugs
- 💡 Propose New Features
- 📝 Improve Documentation
- 🔧 Submit Code
---
## 🌟 Star History
<div align="center">
[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
</div>
---
<div align="center">
### 💖 Thank you for using New API
If this project is helpful to you, welcome to give us a ⭐️ Star
**[Official Documentation](https://docs.newapi.pro/en/docs)** • **[Issue Feedback](https://github.com/Calcium-Ion/new-api/issues)** • **[Latest Release](https://github.com/Calcium-Ion/new-api/releases)**
<sub>Built with ❤️ by QuantumNous</sub>
</div>

View File

@@ -7,8 +7,8 @@
🍥 **Passerelle de modèles étendus de nouvelle génération et système de gestion d'actifs d'IA**
<p align="center">
<a href="./README.md">中文</a> |
<a href="./README.en.md">English</a> |
<a href="./README.zh.md">中文</a> |
<a href="./README.md">English</a> |
<strong>Français</strong> |
<a href="./README.ja.md">日本語</a>
</p>
@@ -35,6 +35,13 @@
<a href="https://trendshift.io/repositories/8227" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
</a>
<br>
<a href="https://hellogithub.com/repository/QuantumNous/new-api" target="_blank">
<img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=539ac4217e69431684ad4a0bab768811&claim_uid=tbFPfKIDHpc4TzR" alt="FeaturedHelloGitHub" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
<a href="https://www.producthunt.com/products/new-api/launches/new-api?embed=true&utm_source=badge-featured&utm_medium=badge&utm_campaign=badge-new-api" target="_blank" rel="noopener noreferrer">
<img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1047693&theme=light&t=1769577875005" alt="New API - All-in-one AI asset management gateway. | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
</p>
<p align="center">
@@ -193,9 +200,11 @@ docker run --name new-api -d --restart always \
### 🔐 Autorisation et sécurité
- 😈 Connexion par autorisation Discord
- 🤖 Connexion par autorisation LinuxDO
- 📱 Connexion par autorisation Telegram
- 🔑 Authentification unifiée OIDC
- 🔍 Requête de quota d'utilisation de clé (avec [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
### 🚀 Fonctionnalités avancées
@@ -212,19 +221,24 @@ docker run --name new-api -d --restart always \
- 🚦 Limitation du débit du modèle pour les utilisateurs
**Conversion de format:**
- 🔄 OpenAI ⇄ Claude Messages
- 🔄 OpenAI Gemini Chat
- 🔄 Fonctionnalité de la pensée au contenu
- 🔄 **OpenAI Compatible ⇄ Claude Messages**
- 🔄 **OpenAI Compatible → Google Gemini**
- 🔄 **Google Gemini → OpenAI Compatible** - Texte uniquement, les appels de fonction ne sont pas encore pris en charge
- 🚧 **OpenAI Compatible ⇄ OpenAI Responses** - En développement
- 🔄 **Fonctionnalité de la pensée au contenu**
**Prise en charge de l'effort de raisonnement:**
<details>
<summary>Voir la configuration détaillée</summary>
**Modèles de la série o d'OpenAI:**
**Modèles de la série OpenAI :**
- `o3-mini-high` - Effort de raisonnement élevé
- `o3-mini-medium` - Effort de raisonnement moyen
- `o3-mini-low` - Effort de raisonnement faible
- `gpt-5-high` - Effort de raisonnement élevé
- `gpt-5-medium` - Effort de raisonnement moyen
- `gpt-5-low` - Effort de raisonnement faible
**Modèles de pensée de Claude:**
- `claude-3-7-sonnet-20250219-thinking` - Activer le mode de pensée
@@ -246,12 +260,13 @@ docker run --name new-api -d --restart always \
| Type de modèle | Description | Documentation |
|---------|------|------|
| 🤖 OpenAI GPTs | série gpt-4-gizmo-* | - |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/en/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/en/api/suno-music) |
| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) |
| 💬 Claude | Format Messages | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message) |
| 🌐 Gemini | Format Google Gemini | [Documentation](https://doc.newapi.pro/en/api/google-gemini-chat) |
| 🤖 OpenAI-Compatible | Modèles compatibles OpenAI | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createchatcompletion) |
| 🤖 OpenAI Responses | Format OpenAI Responses | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createresponse) |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/api/suno-music) |
| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/creatererank) |
| 💬 Claude | Format Messages | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/createmessage) |
| 🌐 Gemini | Format Google Gemini | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/gemini/geminirelayv1beta) |
| 🔧 Dify | Mode ChatFlow | - |
| 🎯 Personnalisé | Prise en charge de l'adresse d'appel complète | - |
@@ -260,16 +275,16 @@ docker run --name new-api -d --restart always \
<details>
<summary>Voir la liste complète des interfaces</summary>
- [Interface de discussion (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-chat-completion)
- [Interface de réponse (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
- [Interface d'image (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/v1-images-generations--post)
- [Interface de discussion (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createchatcompletion)
- [Interface de réponse (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createresponse)
- [Interface d'image (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/post-v1-images-generations)
- [Interface audio (Audio)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/create-transcription)
- [Interface vidéo (Video)](https://docs.newapi.pro/en/docs/api/ai-model/videos/create-video-generation)
- [Interface d'incorporation (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/create-embedding)
- [Interface de rerank (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank)
- [Conversation en temps réel (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session)
- [Discussion Claude](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
- [Discussion Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
- [Interface vidéo (Video)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/createspeech)
- [Interface d'incorporation (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/createembedding)
- [Interface de rerank (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/creatererank)
- [Conversation en temps réel (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/createrealtimesession)
- [Discussion Claude](https://docs.newapi.pro/en/docs/api/ai-model/chat/createmessage)
- [Discussion Google Gemini](https://docs.newapi.pro/en/docs/api/ai-model/chat/gemini/geminirelayv1beta)
</details>
@@ -366,8 +381,9 @@ docker run --name new-api -d --restart always \
<details>
<summary><strong>Méthode 3: Panneau BaoTa</strong></summary>
1. Installez le panneau BaoTa (version **9.2.0** ou supérieure), recherchez **New-API** dans le magasin d'applications et installez-le.
2. Recherchez **New-API** dans le magasin d'applications et installez-le.
1. Installez le panneau BaoTa (version 9.2.0)
2. Recherchez **New-API** dans le magasin d'applications
3. Installation en un clic
📖 [Tutoriel avec des images](./docs/BT.md)
@@ -403,6 +419,7 @@ docker run --name new-api -d --restart always \
| Projet | Description |
|------|------|
| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Outil de recherche de quota d'utilisation avec une clé |
| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | Version optimisée haute performance de New API |
---

View File

@@ -7,8 +7,8 @@
🍥 **次世代大規模モデルゲートウェイとAI資産管理システム**
<p align="center">
<a href="./README.md">中文</a> |
<a href="./README.en.md">English</a> |
<a href="./README.zh.md">中文</a> |
<a href="./README.md">English</a> |
<a href="./README.fr.md">Français</a> |
<strong>日本語</strong>
</p>
@@ -35,6 +35,13 @@
<a href="https://trendshift.io/repositories/8227" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
</a>
<br>
<a href="https://hellogithub.com/repository/QuantumNous/new-api" target="_blank">
<img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=539ac4217e69431684ad4a0bab768811&claim_uid=tbFPfKIDHpc4TzR" alt="FeaturedHelloGitHub" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
<a href="https://www.producthunt.com/products/new-api/launches/new-api?embed=true&utm_source=badge-featured&utm_medium=badge&utm_campaign=badge-new-api" target="_blank" rel="noopener noreferrer">
<img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1047693&theme=light&t=1769577875005" alt="New API - All-in-one AI asset management gateway. | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
</p>
<p align="center">
@@ -193,9 +200,11 @@ docker run --name new-api -d --restart always \
### 🔐 認証とセキュリティ
- 😈 Discord認証ログイン
- 🤖 LinuxDO認証ログイン
- 📱 Telegram認証ログイン
- 🔑 OIDC統一認証
- 🔍 Key使用量クォータ照会[neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)と併用)
@@ -206,10 +215,6 @@ docker run --name new-api -d --restart always \
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)Azureを含む
- ⚡ [Claude Messages](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/ja/api/google-gemini-chat)
- 🔄 [Rerankモデル](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)
- ⚡ [Claude Messages](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/ja/api/google-gemini-chat)
- 🔄 [Rerankモデル](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)Cohere、Jina
**インテリジェントルーティング:**
@@ -218,9 +223,11 @@ docker run --name new-api -d --restart always \
- 🚦 ユーザーレベルモデルレート制限
**フォーマット変換:**
- 🔄 OpenAI ⇄ Claude Messages
- 🔄 OpenAI Gemini Chat
- 🔄 思考からコンテンツへの機能
- 🔄 **OpenAI Compatible ⇄ Claude Messages**
- 🔄 **OpenAI Compatible → Google Gemini**
- 🔄 **Google Gemini → OpenAI Compatible** - テキストのみ、関数呼び出しはまだサポートされていません
- 🚧 **OpenAI Compatible ⇄ OpenAI Responses** - 開発中
- 🔄 **思考からコンテンツへの機能**
**Reasoning Effort サポート:**
@@ -255,12 +262,13 @@ docker run --name new-api -d --restart always \
| モデルタイプ | 説明 | ドキュメント |
|---------|------|------|
| 🤖 OpenAI GPTs | gpt-4-gizmo-* シリーズ | - |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [ドキュメント](https://doc.newapi.pro/ja/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [ドキュメント](https://doc.newapi.pro/ja/api/suno-music) |
| 🔄 Rerank | Cohere、Jina | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank) |
| 💬 Claude | Messagesフォーマット | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message) |
| 🌐 Gemini | Google Geminiフォーマット | [ドキュメント](https://doc.newapi.pro/ja/api/google-gemini-chat) |
| 🤖 OpenAI-Compatible | OpenAI互換モデル | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/createchatcompletion) |
| 🤖 OpenAI Responses | OpenAI Responsesフォーマット | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/createresponse) |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [ドキュメント](https://doc.newapi.pro/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [ドキュメント](https://doc.newapi.pro/api/suno-music) |
| 🔄 Rerank | Cohere、Jina | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/creatererank) |
| 💬 Claude | Messagesフォーマット | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/createmessage) |
| 🌐 Gemini | Google Geminiフォーマット | [ドキュメント](https://docs.newapi.pro/ja/docs/api/ai-model/chat/gemini/geminirelayv1beta) |
| 🔧 Dify | ChatFlowモード | - |
| 🎯 カスタム | 完全な呼び出しアドレスの入力をサポート | - |
@@ -269,16 +277,16 @@ docker run --name new-api -d --restart always \
<details>
<summary>完全なインターフェースリストを表示</summary>
- [チャットインターフェース (Chat Completions)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/create-chat-completion)
- [レスポンスインターフェース (Responses)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/create-response)
- [イメージインターフェース (Image)](https://docs.newapi.pro/ja/docs/api/ai-model/images/openai/v1-images-generations--post)
- [チャットインターフェース (Chat Completions)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/createchatcompletion)
- [レスポンスインターフェース (Responses)](https://docs.newapi.pro/ja/docs/api/ai-model/chat/openai/createresponse)
- [イメージインターフェース (Image)](https://docs.newapi.pro/ja/docs/api/ai-model/images/openai/post-v1-images-generations)
- [オーディオインターフェース (Audio)](https://docs.newapi.pro/ja/docs/api/ai-model/audio/openai/create-transcription)
- [ビデオインターフェース (Video)](https://docs.newapi.pro/ja/docs/api/ai-model/videos/create-video-generation)
- [エンベッドインターフェース (Embeddings)](https://docs.newapi.pro/ja/docs/api/ai-model/embeddings/create-embedding)
- [再ランク付けインターフェース (Rerank)](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/create-rerank)
- [リアルタイム対話インターフェース (Realtime)](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/create-realtime-session)
- [Claudeチャット](https://docs.newapi.pro/ja/docs/api/ai-model/chat/create-message)
- [Google Geminiチャット](https://doc.newapi.pro/ja/api/google-gemini-chat)
- [ビデオインターフェース (Video)](https://docs.newapi.pro/ja/docs/api/ai-model/audio/openai/createspeech)
- [エンベッドインターフェース (Embeddings)](https://docs.newapi.pro/ja/docs/api/ai-model/embeddings/createembedding)
- [再ランク付けインターフェース (Rerank)](https://docs.newapi.pro/ja/docs/api/ai-model/rerank/creatererank)
- [リアルタイム対話インターフェース (Realtime)](https://docs.newapi.pro/ja/docs/api/ai-model/realtime/createrealtimesession)
- [Claudeチャット](https://docs.newapi.pro/ja/docs/api/ai-model/chat/createmessage)
- [Google Geminiチャット](https://docs.newapi.pro/ja/docs/api/ai-model/chat/gemini/geminirelayv1beta)
</details>

384
README.md
View File

@@ -4,11 +4,11 @@
# New API
🍥 **新一代大模型网关与AI资产管理系统**
🍥 **Next-Generation LLM Gateway and AI Asset Management System**
<p align="center">
<strong>中文</strong> |
<a href="./README.en.md">English</a> |
<a href="./README.zh.md">中文</a> |
<strong>English</strong> |
<a href="./README.fr.md">Français</a> |
<a href="./README.ja.md">日本語</a>
</p>
@@ -35,34 +35,41 @@
<a href="https://trendshift.io/repositories/8227" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
</a>
<br>
<a href="https://hellogithub.com/repository/QuantumNous/new-api" target="_blank">
<img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=539ac4217e69431684ad4a0bab768811&claim_uid=tbFPfKIDHpc4TzR" alt="FeaturedHelloGitHub" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
<a href="https://www.producthunt.com/products/new-api/launches/new-api?embed=true&utm_source=badge-featured&utm_medium=badge&utm_campaign=badge-new-api" target="_blank" rel="noopener noreferrer">
<img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1047693&theme=light&t=1769577875005" alt="New API - All-in-one AI asset management gateway. | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
</p>
<p align="center">
<a href="#-快速开始">快速开始</a> •
<a href="#-主要特性">主要特性</a> •
<a href="#-部署">部署</a> •
<a href="#-文档">文档</a> •
<a href="#-帮助支持">帮助</a>
<a href="#-quick-start">Quick Start</a> •
<a href="#-key-features">Key Features</a> •
<a href="#-deployment">Deployment</a> •
<a href="#-documentation">Documentation</a> •
<a href="#-help-support">Help</a>
</p>
</div>
## 📝 项目说明
## 📝 Project Description
> [!NOTE]
> 本项目为开源项目,在 [One API](https://github.com/songquanpeng/one-api) 的基础上进行二次开发
> This is an open-source project developed based on [One API](https://github.com/songquanpeng/one-api)
> [!IMPORTANT]
> - 本项目仅供个人学习使用,不保证稳定性,且不提供任何技术支持
> - 使用者必须在遵循 OpenAI 的 [使用条款](https://openai.com/policies/terms-of-use) 以及**法律法规**的情况下使用,不得用于非法用途
> - 根据 [《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm) 的要求,请勿对中国地区公众提供一切未经备案的生成式人工智能服务
> - This project is for personal learning purposes only, with no guarantee of stability or technical support
> - Users must comply with OpenAI's [Terms of Use](https://openai.com/policies/terms-of-use) and **applicable laws and regulations**, and must not use it for illegal purposes
> - According to the [《Interim Measures for the Management of Generative Artificial Intelligence Services》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm), please do not provide any unregistered generative AI services to the public in China.
---
## 🤝 我们信任的合作伙伴
## 🤝 Trusted Partners
<p align="center">
<em>排名不分先后</em>
<em>No particular order</em>
</p>
<p align="center">
@@ -70,13 +77,13 @@
<img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
</a>
<a href="https://bda.pku.edu.cn/" target="_blank">
<img src="./docs/images/pku.png" alt="北京大学" height="80" />
<img src="./docs/images/pku.png" alt="Peking University" height="80" />
</a>
<a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
<img src="./docs/images/ucloud.png" alt="UCloud 优刻得" height="80" />
<img src="./docs/images/ucloud.png" alt="UCloud" height="80" />
</a>
<a href="https://www.aliyun.com/" target="_blank">
<img src="./docs/images/aliyun.png" alt="阿里云" height="80" />
<img src="./docs/images/aliyun.png" alt="Alibaba Cloud" height="80" />
</a>
<a href="https://io.net/" target="_blank">
<img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
@@ -85,7 +92,7 @@
---
## 🙏 特别鸣谢
## 🙏 Special Thanks
<p align="center">
<a href="https://www.jetbrains.com/?from=new-api" target="_blank">
@@ -94,42 +101,42 @@
</p>
<p align="center">
<strong>感谢 <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> 为本项目提供免费的开源开发许可证</strong>
<strong>Thanks to <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> for providing free open-source development license for this project</strong>
</p>
---
## 🚀 快速开始
## 🚀 Quick Start
### 使用 Docker Compose(推荐)
### Using Docker Compose (Recommended)
```bash
# 克隆项目
# Clone the project
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# 编辑 docker-compose.yml 配置
# Edit docker-compose.yml configuration
nano docker-compose.yml
# 启动服务
# Start the service
docker-compose up -d
```
<details>
<summary><strong>使用 Docker 命令</strong></summary>
<summary><strong>Using Docker Commands</strong></summary>
```bash
# 拉取最新镜像
# Pull the latest image
docker pull calciumion/new-api:latest
# 使用 SQLite(默认)
# Using SQLite (default)
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
# 使用 MySQL
# Using MySQL
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
@@ -138,92 +145,94 @@ docker run --name new-api -d --restart always \
calciumion/new-api:latest
```
> **💡 提示:** `-v ./data:/data` 会将数据保存在当前目录的 `data` 文件夹中,你也可以改为绝对路径如 `-v /your/custom/path:/data`
> **💡 Tip:** `-v ./data:/data` will save data in the `data` folder of the current directory, you can also change it to an absolute path like `-v /your/custom/path:/data`
</details>
---
🎉 部署完成后,访问 `http://localhost:3000` 即可使用!
🎉 After deployment is complete, visit `http://localhost:3000` to start using!
📖 更多部署方式请参考 [部署指南](https://docs.newapi.pro/zh/docs/installation)
📖 For more deployment methods, please refer to [Deployment Guide](https://docs.newapi.pro/en/docs/installation)
---
## 📚 文档
## 📚 Documentation
<div align="center">
### 📖 [官方文档](https://docs.newapi.pro/zh/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
### 📖 [Official Documentation](https://docs.newapi.pro/en/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
</div>
**快速导航:**
**Quick Navigation:**
| 分类 | 链接 |
| Category | Link |
|------|------|
| 🚀 部署指南 | [安装文档](https://docs.newapi.pro/zh/docs/installation) |
| ⚙️ 环境配置 | [环境变量](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables) |
| 📡 接口文档 | [API 文档](https://docs.newapi.pro/zh/docs/api) |
| ❓ 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
| 🚀 Deployment Guide | [Installation Documentation](https://docs.newapi.pro/en/docs/installation) |
| ⚙️ Environment Configuration | [Environment Variables](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables) |
| 📡 API Documentation | [API Documentation](https://docs.newapi.pro/en/docs/api) |
| ❓ FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
---
## ✨ 主要特性
## ✨ Key Features
> 详细特性请参考 [特性说明](https://docs.newapi.pro/zh/docs/guide/wiki/basic-concepts/features-introduction)
> For detailed features, please refer to [Features Introduction](https://docs.newapi.pro/en/docs/guide/wiki/basic-concepts/features-introduction)
### 🎨 核心功能
### 🎨 Core Functions
| 特性 | 说明 |
| Feature | Description |
|------|------|
| 🎨 全新 UI | 现代化的用户界面设计 |
| 🌍 多语言 | 支持中文、英文、法语、日语 |
| 🔄 数据兼容 | 完全兼容原版 One API 数据库 |
| 📈 数据看板 | 可视化控制台与统计分析 |
| 🔒 权限管理 | 令牌分组、模型限制、用户管理 |
| 🎨 New UI | Modern user interface design |
| 🌍 Multi-language | Supports Chinese, English, French, Japanese |
| 🔄 Data Compatibility | Fully compatible with the original One API database |
| 📈 Data Dashboard | Visual console and statistical analysis |
| 🔒 Permission Management | Token grouping, model restrictions, user management |
### 💰 支付与计费
### 💰 Payment and Billing
-在线充值(易支付、Stripe
-模型按次数收费
-缓存计费支持(OpenAIAzureDeepSeekClaudeQwen等所有支持的模型)
-灵活的计费策略配置
-Online recharge (EPay, Stripe)
-Pay-per-use model pricing
-Cache billing support (OpenAI, Azure, DeepSeek, Claude, Qwen and all supported models)
-Flexible billing policy configuration
### 🔐 授权与安全
### 🔐 Authorization and Security
- 😈 Discord 授权登录
- 🤖 LinuxDO 授权登录
- 📱 Telegram 授权登录
- 🔑 OIDC 统一认证
- 🔍 Key 查询使用额度(配合 [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)
- 😈 Discord authorization login
- 🤖 LinuxDO authorization login
- 📱 Telegram authorization login
- 🔑 OIDC unified authentication
- 🔍 Key quota query usage (with [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool))
### 🚀 高级功能
### 🚀 Advanced Features
**API 格式支持:**
- ⚡ [OpenAI Responses](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-response)
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/create-realtime-session)(含 Azure
- ⚡ [Claude Messages](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/api/google-gemini-chat)
- 🔄 [Rerank 模型](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank)CohereJina
**API Format Support:**
- ⚡ [OpenAI Responses](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/create-response)
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/en/docs/api/ai-model/realtime/create-realtime-session) (including Azure)
- ⚡ [Claude Messages](https://docs.newapi.pro/en/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/en/api/google-gemini-chat)
- 🔄 [Rerank Models](https://docs.newapi.pro/en/docs/api/ai-model/rerank/create-rerank) (Cohere, Jina)
**智能路由:**
- ⚖️ 渠道加权随机
- 🔄 失败自动重试
- 🚦 用户级别模型限流
**Intelligent Routing:**
- ⚖️ Channel weighted random
- 🔄 Automatic retry on failure
- 🚦 User-level model rate limiting
**格式转换:**
- 🔄 OpenAI ⇄ Claude Messages
- 🔄 OpenAI Gemini Chat
- 🔄 思考转内容功能
**Format Conversion:**
- 🔄 **OpenAI Compatible ⇄ Claude Messages**
- 🔄 **OpenAI Compatible → Google Gemini**
- 🔄 **Google Gemini → OpenAI Compatible** - Text only, function calling not supported yet
- 🚧 **OpenAI Compatible ⇄ OpenAI Responses** - In development
- 🔄 **Thinking-to-content functionality**
**Reasoning Effort 支持:**
**Reasoning Effort Support:**
<details>
<summary>查看详细配置</summary>
<summary>View detailed configuration</summary>
**OpenAI 系列模型:**
**OpenAI series models:**
- `o3-mini-high` - High reasoning effort
- `o3-mini-medium` - Medium reasoning effort
- `o3-mini-low` - Low reasoning effort
@@ -231,119 +240,120 @@ docker run --name new-api -d --restart always \
- `gpt-5-medium` - Medium reasoning effort
- `gpt-5-low` - Low reasoning effort
**Claude 思考模型:**
- `claude-3-7-sonnet-20250219-thinking` - 启用思考模式
**Claude thinking models:**
- `claude-3-7-sonnet-20250219-thinking` - Enable thinking mode
**Google Gemini 系列模型:**
- `gemini-2.5-flash-thinking` - 启用思考模式
- `gemini-2.5-flash-nothinking` - 禁用思考模式
- `gemini-2.5-pro-thinking` - 启用思考模式
- `gemini-2.5-pro-thinking-128` - 启用思考模式,并设置思考预算为128tokens
- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度(无需再设置思考预算后缀)
**Google Gemini series models:**
- `gemini-2.5-flash-thinking` - Enable thinking mode
- `gemini-2.5-flash-nothinking` - Disable thinking mode
- `gemini-2.5-pro-thinking` - Enable thinking mode
- `gemini-2.5-pro-thinking-128` - Enable thinking mode with thinking budget of 128 tokens
- You can also append `-low`, `-medium`, or `-high` to any Gemini model name to request the corresponding reasoning effort (no extra thinking-budget suffix needed).
</details>
---
## 🤖 模型支持
## 🤖 Model Support
> 详情请参考 [接口文档 - 中继接口](https://docs.newapi.pro/zh/docs/api)
> For details, please refer to [API Documentation - Relay Interface](https://docs.newapi.pro/en/docs/api)
| 模型类型 | 说明 | 文档 |
| Model Type | Description | Documentation |
|---------|------|------|
| 🤖 OpenAI GPTs | gpt-4-gizmo-* 系列 | - |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [文档](https://doc.newapi.pro/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [文档](https://doc.newapi.pro/api/suno-music) |
| 🔄 Rerank | Cohere、Jina | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank) |
| 💬 Claude | Messages 格式 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message) |
| 🌐 Gemini | Google Gemini 格式 | [文档](https://doc.newapi.pro/api/google-gemini-chat) |
| 🔧 Dify | ChatFlow 模式 | - |
| 🎯 自定义 | 支持完整调用地址 | - |
| 🤖 OpenAI-Compatible | OpenAI compatible models | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createchatcompletion) |
| 🤖 OpenAI Responses | OpenAI Responses format | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createresponse) |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [Documentation](https://doc.newapi.pro/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [Documentation](https://doc.newapi.pro/api/suno-music) |
| 🔄 Rerank | Cohere, Jina | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/rerank/creatererank) |
| 💬 Claude | Messages format | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/createmessage) |
| 🌐 Gemini | Google Gemini format | [Documentation](https://docs.newapi.pro/en/docs/api/ai-model/chat/gemini/geminirelayv1beta) |
| 🔧 Dify | ChatFlow mode | - |
| 🎯 Custom | Supports complete call address | - |
### 📡 支持的接口
### 📡 Supported Interfaces
<details>
<summary>查看完整接口列表</summary>
<summary>View complete interface list</summary>
- [聊天接口 (Chat Completions)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-chat-completion)
- [响应接口 (Responses)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-response)
- [图像接口 (Image)](https://docs.newapi.pro/zh/docs/api/ai-model/images/openai/v1-images-generations--post)
- [音频接口 (Audio)](https://docs.newapi.pro/zh/docs/api/ai-model/audio/openai/create-transcription)
- [视频接口 (Video)](https://docs.newapi.pro/zh/docs/api/ai-model/videos/create-video-generation)
- [嵌入接口 (Embeddings)](https://docs.newapi.pro/zh/docs/api/ai-model/embeddings/create-embedding)
- [重排序接口 (Rerank)](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank)
- [实时对话 (Realtime)](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/create-realtime-session)
- [Claude 聊天](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message)
- [Google Gemini 聊天](https://doc.newapi.pro/api/google-gemini-chat)
- [Chat Interface (Chat Completions)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createchatcompletion)
- [Response Interface (Responses)](https://docs.newapi.pro/en/docs/api/ai-model/chat/openai/createresponse)
- [Image Interface (Image)](https://docs.newapi.pro/en/docs/api/ai-model/images/openai/post-v1-images-generations)
- [Audio Interface (Audio)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/create-transcription)
- [Video Interface (Video)](https://docs.newapi.pro/en/docs/api/ai-model/audio/openai/createspeech)
- [Embedding Interface (Embeddings)](https://docs.newapi.pro/en/docs/api/ai-model/embeddings/createembedding)
- [Rerank Interface (Rerank)](https://docs.newapi.pro/en/docs/api/ai-model/rerank/creatererank)
- [Realtime Conversation (Realtime)](https://docs.newapi.pro/en/docs/api/ai-model/realtime/createrealtimesession)
- [Claude Chat](https://docs.newapi.pro/en/docs/api/ai-model/chat/createmessage)
- [Google Gemini Chat](https://docs.newapi.pro/en/docs/api/ai-model/chat/gemini/geminirelayv1beta)
</details>
---
## 🚢 部署
## 🚢 Deployment
> [!TIP]
> **最新版 Docker 镜像:** `calciumion/new-api:latest`
> **Latest Docker image:** `calciumion/new-api:latest`
### 📋 部署要求
### 📋 Deployment Requirements
| 组件 | 要求 |
| Component | Requirement |
|------|------|
| **本地数据库** | SQLiteDocker 需挂载 `/data` 目录)|
| **远程数据库** | MySQL ≥ 5.7.8 PostgreSQL ≥ 9.6 |
| **容器引擎** | Docker / Docker Compose |
| **Local database** | SQLite (Docker must mount `/data` directory)|
| **Remote database** | MySQL ≥ 5.7.8 or PostgreSQL ≥ 9.6 |
| **Container engine** | Docker / Docker Compose |
### ⚙️ 环境变量配置
### ⚙️ Environment Variable Configuration
<details>
<summary>常用环境变量配置</summary>
<summary>Common environment variable configuration</summary>
| 变量名 | 说明 | 默认值 |
|--------|--------------------------------------------------------------|--------|
| `SESSION_SECRET` | 会话密钥(多机部署必须) | - |
| `CRYPTO_SECRET` | 加密密钥Redis 必须) | - |
| `SQL_DSN` | 数据库连接字符串 | - |
| `REDIS_CONN_STRING` | Redis 连接字符串 | - |
| `STREAMING_TIMEOUT` | 流式超时时间(秒) | `300` |
| `STREAM_SCANNER_MAX_BUFFER_MB` | 流式扫描器单行最大缓冲MB图像生成等超大 `data:` 片段(如 4K 图片 base64需适当调大 | `64` |
| `MAX_REQUEST_BODY_MB` | 请求体最大大小MB**解压后**计;防止超大请求/zip bomb 导致内存暴涨),超过将返回 `413` | `32` |
| `AZURE_DEFAULT_API_VERSION` | Azure API 版本 | `2025-04-01-preview` |
| `ERROR_LOG_ENABLED` | 错误日志开关 | `false` |
| `PYROSCOPE_URL` | Pyroscope 服务地址 | - |
| `PYROSCOPE_APP_NAME` | Pyroscope 应用名 | `new-api` |
| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope Basic Auth 用户名 | - |
| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope Basic Auth 密码 | - |
| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex 采样率 | `5` |
| `PYROSCOPE_BLOCK_RATE` | Pyroscope block 采样率 | `5` |
| `HOSTNAME` | Pyroscope 标签里的主机名 | `new-api` |
| Variable Name | Description | Default Value |
|--------|------|--------|
| `SESSION_SECRET` | Session secret (required for multi-machine deployment) | - |
| `CRYPTO_SECRET` | Encryption secret (required for Redis) | - |
| `SQL_DSN` | Database connection string | - |
| `REDIS_CONN_STRING` | Redis connection string | - |
| `STREAMING_TIMEOUT` | Streaming timeout (seconds) | `300` |
| `STREAM_SCANNER_MAX_BUFFER_MB` | Max per-line buffer (MB) for the stream scanner; increase when upstream sends huge image/base64 payloads | `64` |
| `MAX_REQUEST_BODY_MB` | Max request body size (MB, counted **after decompression**; prevents huge requests/zip bombs from exhausting memory). Exceeding it returns `413` | `32` |
| `AZURE_DEFAULT_API_VERSION` | Azure API version | `2025-04-01-preview` |
| `ERROR_LOG_ENABLED` | Error log switch | `false` |
| `PYROSCOPE_URL` | Pyroscope server address | - |
| `PYROSCOPE_APP_NAME` | Pyroscope application name | `new-api` |
| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope basic auth user | - |
| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope basic auth password | - |
| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex sampling rate | `5` |
| `PYROSCOPE_BLOCK_RATE` | Pyroscope block sampling rate | `5` |
| `HOSTNAME` | Hostname tag for Pyroscope | `new-api` |
📖 **完整配置:** [环境变量文档](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables)
📖 **Complete configuration:** [Environment Variables Documentation](https://docs.newapi.pro/en/docs/installation/config-maintenance/environment-variables)
</details>
### 🔧 部署方式
### 🔧 Deployment Methods
<details>
<summary><strong>方式 1Docker Compose(推荐)</strong></summary>
<summary><strong>Method 1: Docker Compose (Recommended)</strong></summary>
```bash
# 克隆项目
# Clone the project
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# 编辑配置
# Edit configuration
nano docker-compose.yml
# 启动服务
# Start service
docker-compose up -d
```
</details>
<details>
<summary><strong>方式 2Docker 命令</strong></summary>
<summary><strong>Method 2: Docker Commands</strong></summary>
**使用 SQLite**
**Using SQLite:**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
@@ -352,7 +362,7 @@ docker run --name new-api -d --restart always \
calciumion/new-api:latest
```
**使用 MySQL**
**Using MySQL:**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
@@ -362,76 +372,76 @@ docker run --name new-api -d --restart always \
calciumion/new-api:latest
```
> **💡 路径说明:**
> - `./data:/data` - 相对路径,数据保存在当前目录的 data 文件夹
> - 也可使用绝对路径,如:`/your/custom/path:/data`
> **💡 Path explanation:**
> - `./data:/data` - Relative path, data saved in the data folder of the current directory
> - You can also use absolute path, e.g.: `/your/custom/path:/data`
</details>
<details>
<summary><strong>方式 3宝塔面板</strong></summary>
<summary><strong>Method 3: BaoTa Panel</strong></summary>
1. 安装宝塔面板(≥ 9.2.0 版本)
2. 在应用商店搜索 **New-API**
3. 一键安装
1. Install BaoTa Panel (≥ 9.2.0 version)
2. Search for **New-API** in the application store
3. One-click installation
📖 [图文教程](./docs/BT.md)
📖 [Tutorial with images](./docs/BT.md)
</details>
### ⚠️ 多机部署注意事项
### ⚠️ Multi-machine Deployment Considerations
> [!WARNING]
> - **必须设置** `SESSION_SECRET` - 否则登录状态不一致
> - **公用 Redis 必须设置** `CRYPTO_SECRET` - 否则数据无法解密
> - **Must set** `SESSION_SECRET` - Otherwise login status inconsistent
> - **Shared Redis must set** `CRYPTO_SECRET` - Otherwise data cannot be decrypted
### 🔄 渠道重试与缓存
### 🔄 Channel Retry and Cache
**重试配置:** `设置 → 运营设置 → 通用设置 → 失败重试次数`
**Retry configuration:** `Settings → Operation Settings → General Settings → Failure Retry Count`
**缓存配置:**
- `REDIS_CONN_STRING`Redis 缓存(推荐)
- `MEMORY_CACHE_ENABLED`:内存缓存
**Cache configuration:**
- `REDIS_CONN_STRING`: Redis cache (recommended)
- `MEMORY_CACHE_ENABLED`: Memory cache
---
## 🔗 相关项目
## 🔗 Related Projects
### 上游项目
### Upstream Projects
| 项目 | 说明 |
| Project | Description |
|------|------|
| [One API](https://github.com/songquanpeng/one-api) | 原版项目基础 |
| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney 接口支持 |
| [One API](https://github.com/songquanpeng/one-api) | Original project base |
| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney interface support |
### 配套工具
### Supporting Tools
| 项目 | 说明 |
| Project | Description |
|------|------|
| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key 额度查询工具 |
| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API 高性能优化版 |
| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key quota query tool |
| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API high-performance optimized version |
---
## 💬 帮助支持
## 💬 Help Support
### 📖 文档资源
### 📖 Documentation Resources
| 资源 | 链接 |
| Resource | Link |
|------|------|
| 📘 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
| 🐛 反馈问题 | [问题反馈](https://docs.newapi.pro/zh/docs/support/feedback-issues) |
| 📚 完整文档 | [官方文档](https://docs.newapi.pro/zh/docs) |
| 📘 FAQ | [FAQ](https://docs.newapi.pro/en/docs/support/faq) |
| 💬 Community Interaction | [Communication Channels](https://docs.newapi.pro/en/docs/support/community-interaction) |
| 🐛 Issue Feedback | [Issue Feedback](https://docs.newapi.pro/en/docs/support/feedback-issues) |
| 📚 Complete Documentation | [Official Documentation](https://docs.newapi.pro/en/docs) |
### 🤝 贡献指南
### 🤝 Contribution Guide
欢迎各种形式的贡献!
Welcome all forms of contribution!
- 🐛 报告 Bug
- 💡 提出新功能
- 📝 改进文档
- 🔧 提交代码
- 🐛 Report Bugs
- 💡 Propose New Features
- 📝 Improve Documentation
- 🔧 Submit Code
---
@@ -447,11 +457,11 @@ docker run --name new-api -d --restart always \
<div align="center">
### 💖 感谢使用 New API
### 💖 Thank you for using New API
如果这个项目对你有帮助,欢迎给我们一个 ⭐️ Star
If this project is helpful to you, welcome to give us a ⭐️ Star
**[官方文档](https://docs.newapi.pro/zh/docs)** • **[问题反馈](https://github.com/Calcium-Ion/new-api/issues)** • **[最新发布](https://github.com/Calcium-Ion/new-api/releases)**
**[Official Documentation](https://docs.newapi.pro/en/docs)** • **[Issue Feedback](https://github.com/Calcium-Ion/new-api/issues)** • **[Latest Release](https://github.com/Calcium-Ion/new-api/releases)**
<sub>Built with ❤️ by QuantumNous</sub>

468
README.zh.md Normal file
View File

@@ -0,0 +1,468 @@
<div align="center">
![new-api](/web/public/logo.png)
# New API
🍥 **新一代大模型网关与AI资产管理系统**
<p align="center">
<strong>中文</strong> |
<a href="./README.md">English</a> |
<a href="./README.fr.md">Français</a> |
<a href="./README.ja.md">日本語</a>
</p>
<p align="center">
<a href="https://raw.githubusercontent.com/Calcium-Ion/new-api/main/LICENSE">
<img src="https://img.shields.io/github/license/Calcium-Ion/new-api?color=brightgreen" alt="license">
</a>
<a href="https://github.com/Calcium-Ion/new-api/releases/latest">
<img src="https://img.shields.io/github/v/release/Calcium-Ion/new-api?color=brightgreen&include_prereleases" alt="release">
</a>
<a href="https://github.com/users/Calcium-Ion/packages/container/package/new-api">
<img src="https://img.shields.io/badge/docker-ghcr.io-blue" alt="docker">
</a>
<a href="https://hub.docker.com/r/CalciumIon/new-api">
<img src="https://img.shields.io/badge/docker-dockerHub-blue" alt="docker">
</a>
<a href="https://goreportcard.com/report/github.com/Calcium-Ion/new-api">
<img src="https://goreportcard.com/badge/github.com/Calcium-Ion/new-api" alt="GoReportCard">
</a>
</p>
<p align="center">
<a href="https://trendshift.io/repositories/8227" target="_blank">
<img src="https://trendshift.io/api/badge/repositories/8227" alt="Calcium-Ion%2Fnew-api | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
</a>
<br>
<a href="https://hellogithub.com/repository/QuantumNous/new-api" target="_blank">
<img src="https://api.hellogithub.com/v1/widgets/recommend.svg?rid=539ac4217e69431684ad4a0bab768811&claim_uid=tbFPfKIDHpc4TzR" alt="FeaturedHelloGitHub" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
<a href="https://www.producthunt.com/products/new-api/launches/new-api?embed=true&utm_source=badge-featured&utm_medium=badge&utm_campaign=badge-new-api" target="_blank" rel="noopener noreferrer">
<img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1047693&theme=light&t=1769577875005" alt="New API - All-in-one AI asset management gateway. | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" />
</a>
</p>
<p align="center">
<a href="#-快速开始">快速开始</a> •
<a href="#-主要特性">主要特性</a> •
<a href="#-部署">部署</a> •
<a href="#-文档">文档</a> •
<a href="#-帮助支持">帮助</a>
</p>
</div>
## 📝 项目说明
> [!NOTE]
> 本项目为开源项目,在 [One API](https://github.com/songquanpeng/one-api) 的基础上进行二次开发
> [!IMPORTANT]
> - 本项目仅供个人学习使用,不保证稳定性,且不提供任何技术支持
> - 使用者必须在遵循 OpenAI 的 [使用条款](https://openai.com/policies/terms-of-use) 以及**法律法规**的情况下使用,不得用于非法用途
> - 根据 [《生成式人工智能服务管理暂行办法》](http://www.cac.gov.cn/2023-07/13/c_1690898327029107.htm) 的要求,请勿对中国地区公众提供一切未经备案的生成式人工智能服务
---
## 🤝 我们信任的合作伙伴
<p align="center">
<em>排名不分先后</em>
</p>
<p align="center">
<a href="https://www.cherry-ai.com/" target="_blank">
<img src="./docs/images/cherry-studio.png" alt="Cherry Studio" height="80" />
</a>
<a href="https://bda.pku.edu.cn/" target="_blank">
<img src="./docs/images/pku.png" alt="北京大学" height="80" />
</a>
<a href="https://www.compshare.cn/?ytag=GPU_yy_gh_newapi" target="_blank">
<img src="./docs/images/ucloud.png" alt="UCloud 优刻得" height="80" />
</a>
<a href="https://www.aliyun.com/" target="_blank">
<img src="./docs/images/aliyun.png" alt="阿里云" height="80" />
</a>
<a href="https://io.net/" target="_blank">
<img src="./docs/images/io-net.png" alt="IO.NET" height="80" />
</a>
</p>
---
## 🙏 特别鸣谢
<p align="center">
<a href="https://www.jetbrains.com/?from=new-api" target="_blank">
<img src="https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.png" alt="JetBrains Logo" width="120" />
</a>
</p>
<p align="center">
<strong>感谢 <a href="https://www.jetbrains.com/?from=new-api">JetBrains</a> 为本项目提供免费的开源开发许可证</strong>
</p>
---
## 🚀 快速开始
### 使用 Docker Compose推荐
```bash
# 克隆项目
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# 编辑 docker-compose.yml 配置
nano docker-compose.yml
# 启动服务
docker-compose up -d
```
<details>
<summary><strong>使用 Docker 命令</strong></summary>
```bash
# 拉取最新镜像
docker pull calciumion/new-api:latest
# 使用 SQLite默认
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
# 使用 MySQL
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
> **💡 提示:** `-v ./data:/data` 会将数据保存在当前目录的 `data` 文件夹中,你也可以改为绝对路径如 `-v /your/custom/path:/data`
</details>
---
🎉 部署完成后,访问 `http://localhost:3000` 即可使用!
📖 更多部署方式请参考 [部署指南](https://docs.newapi.pro/zh/docs/installation)
---
## 📚 文档
<div align="center">
### 📖 [官方文档](https://docs.newapi.pro/zh/docs) | [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/QuantumNous/new-api)
</div>
**快速导航:**
| 分类 | 链接 |
|------|------|
| 🚀 部署指南 | [安装文档](https://docs.newapi.pro/zh/docs/installation) |
| ⚙️ 环境配置 | [环境变量](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables) |
| 📡 接口文档 | [API 文档](https://docs.newapi.pro/zh/docs/api) |
| ❓ 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
---
## ✨ 主要特性
> 详细特性请参考 [特性说明](https://docs.newapi.pro/zh/docs/guide/wiki/basic-concepts/features-introduction)
### 🎨 核心功能
| 特性 | 说明 |
|------|------|
| 🎨 全新 UI | 现代化的用户界面设计 |
| 🌍 多语言 | 支持中文、英文、法语、日语 |
| 🔄 数据兼容 | 完全兼容原版 One API 数据库 |
| 📈 数据看板 | 可视化控制台与统计分析 |
| 🔒 权限管理 | 令牌分组、模型限制、用户管理 |
### 💰 支付与计费
- ✅ 在线充值易支付、Stripe
- ✅ 模型按次数收费
- ✅ 缓存计费支持OpenAI、Azure、DeepSeek、Claude、Qwen等所有支持的模型
- ✅ 灵活的计费策略配置
### 🔐 授权与安全
- 😈 Discord 授权登录
- 🤖 LinuxDO 授权登录
- 📱 Telegram 授权登录
- 🔑 OIDC 统一认证
- 🔍 Key 查询使用额度(配合 [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool)
### 🚀 高级功能
**API 格式支持:**
- ⚡ [OpenAI Responses](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/create-response)
- ⚡ [OpenAI Realtime API](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/create-realtime-session)(含 Azure
- ⚡ [Claude Messages](https://docs.newapi.pro/zh/docs/api/ai-model/chat/create-message)
- ⚡ [Google Gemini](https://doc.newapi.pro/api/google-gemini-chat)
- 🔄 [Rerank 模型](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank)Cohere、Jina
**智能路由:**
- ⚖️ 渠道加权随机
- 🔄 失败自动重试
- 🚦 用户级别模型限流
**格式转换:**
- 🔄 **OpenAI Compatible ⇄ Claude Messages**
- 🔄 **OpenAI Compatible → Google Gemini**
- 🔄 **Google Gemini → OpenAI Compatible** - 仅支持文本,暂不支持函数调用
- 🚧 **OpenAI Compatible ⇄ OpenAI Responses** - 开发中
- 🔄 **思考转内容功能**
**Reasoning Effort 支持:**
<details>
<summary>查看详细配置</summary>
**OpenAI 系列模型:**
- `o3-mini-high` - High reasoning effort
- `o3-mini-medium` - Medium reasoning effort
- `o3-mini-low` - Low reasoning effort
- `gpt-5-high` - High reasoning effort
- `gpt-5-medium` - Medium reasoning effort
- `gpt-5-low` - Low reasoning effort
**Claude 思考模型:**
- `claude-3-7-sonnet-20250219-thinking` - 启用思考模式
**Google Gemini 系列模型:**
- `gemini-2.5-flash-thinking` - 启用思考模式
- `gemini-2.5-flash-nothinking` - 禁用思考模式
- `gemini-2.5-pro-thinking` - 启用思考模式
- `gemini-2.5-pro-thinking-128` - 启用思考模式并设置思考预算为128tokens
- 也可以直接在 Gemini 模型名称后追加 `-low` / `-medium` / `-high` 来控制思考力度(无需再设置思考预算后缀)
</details>
---
## 🤖 模型支持
> 详情请参考 [接口文档 - 中继接口](https://docs.newapi.pro/zh/docs/api)
| 模型类型 | 说明 | 文档 |
|---------|------|------|
| 🤖 OpenAI-Compatible | OpenAI 兼容模型 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/createchatcompletion) |
| 🤖 OpenAI Responses | OpenAI Responses 格式 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/createresponse) |
| 🎨 Midjourney-Proxy | [Midjourney-Proxy(Plus)](https://github.com/novicezk/midjourney-proxy) | [文档](https://doc.newapi.pro/api/midjourney-proxy-image) |
| 🎵 Suno-API | [Suno API](https://github.com/Suno-API/Suno-API) | [文档](https://doc.newapi.pro/api/suno-music) |
| 🔄 Rerank | Cohere、Jina | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/create-rerank) |
| 💬 Claude | Messages 格式 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/createmessage) |
| 🌐 Gemini | Google Gemini 格式 | [文档](https://docs.newapi.pro/zh/docs/api/ai-model/chat/gemini/geminirelayv1beta) |
| 🔧 Dify | ChatFlow 模式 | - |
| 🎯 自定义 | 支持完整调用地址 | - |
### 📡 支持的接口
<details>
<summary>查看完整接口列表</summary>
- [聊天接口 (Chat Completions)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/createchatcompletion)
- [响应接口 (Responses)](https://docs.newapi.pro/zh/docs/api/ai-model/chat/openai/createresponse)
- [图像接口 (Image)](https://docs.newapi.pro/zh/docs/api/ai-model/images/openai/post-v1-images-generations)
- [音频接口 (Audio)](https://docs.newapi.pro/zh/docs/api/ai-model/audio/openai/create-transcription)
- [视频接口 (Video)](https://docs.newapi.pro/zh/docs/api/ai-model/audio/openai/createspeech)
- [嵌入接口 (Embeddings)](https://docs.newapi.pro/zh/docs/api/ai-model/embeddings/createembedding)
- [重排序接口 (Rerank)](https://docs.newapi.pro/zh/docs/api/ai-model/rerank/creatererank)
- [实时对话 (Realtime)](https://docs.newapi.pro/zh/docs/api/ai-model/realtime/createrealtimesession)
- [Claude 聊天](https://docs.newapi.pro/zh/docs/api/ai-model/chat/createmessage)
- [Google Gemini 聊天](https://docs.newapi.pro/zh/docs/api/ai-model/chat/gemini/geminirelayv1beta)
</details>
---
## 🚢 部署
> [!TIP]
> **最新版 Docker 镜像:** `calciumion/new-api:latest`
### 📋 部署要求
| 组件 | 要求 |
|------|------|
| **本地数据库** | SQLiteDocker 需挂载 `/data` 目录)|
| **远程数据库** | MySQL ≥ 5.7.8 或 PostgreSQL ≥ 9.6 |
| **容器引擎** | Docker / Docker Compose |
### ⚙️ 环境变量配置
<details>
<summary>常用环境变量配置</summary>
| 变量名 | 说明 | 默认值 |
|--------|--------------------------------------------------------------|--------|
| `SESSION_SECRET` | 会话密钥(多机部署必须) | - |
| `CRYPTO_SECRET` | 加密密钥Redis 必须) | - |
| `SQL_DSN` | 数据库连接字符串 | - |
| `REDIS_CONN_STRING` | Redis 连接字符串 | - |
| `STREAMING_TIMEOUT` | 流式超时时间(秒) | `300` |
| `STREAM_SCANNER_MAX_BUFFER_MB` | 流式扫描器单行最大缓冲MB图像生成等超大 `data:` 片段(如 4K 图片 base64需适当调大 | `64` |
| `MAX_REQUEST_BODY_MB` | 请求体最大大小MB**解压后**计;防止超大请求/zip bomb 导致内存暴涨),超过将返回 `413` | `32` |
| `AZURE_DEFAULT_API_VERSION` | Azure API 版本 | `2025-04-01-preview` |
| `ERROR_LOG_ENABLED` | 错误日志开关 | `false` |
| `PYROSCOPE_URL` | Pyroscope 服务地址 | - |
| `PYROSCOPE_APP_NAME` | Pyroscope 应用名 | `new-api` |
| `PYROSCOPE_BASIC_AUTH_USER` | Pyroscope Basic Auth 用户名 | - |
| `PYROSCOPE_BASIC_AUTH_PASSWORD` | Pyroscope Basic Auth 密码 | - |
| `PYROSCOPE_MUTEX_RATE` | Pyroscope mutex 采样率 | `5` |
| `PYROSCOPE_BLOCK_RATE` | Pyroscope block 采样率 | `5` |
| `HOSTNAME` | Pyroscope 标签里的主机名 | `new-api` |
📖 **完整配置:** [环境变量文档](https://docs.newapi.pro/zh/docs/installation/config-maintenance/environment-variables)
</details>
### 🔧 部署方式
<details>
<summary><strong>方式 1Docker Compose推荐</strong></summary>
```bash
# 克隆项目
git clone https://github.com/QuantumNous/new-api.git
cd new-api
# 编辑配置
nano docker-compose.yml
# 启动服务
docker-compose up -d
```
</details>
<details>
<summary><strong>方式 2Docker 命令</strong></summary>
**使用 SQLite**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
**使用 MySQL**
```bash
docker run --name new-api -d --restart always \
-p 3000:3000 \
-e SQL_DSN="root:123456@tcp(localhost:3306)/oneapi" \
-e TZ=Asia/Shanghai \
-v ./data:/data \
calciumion/new-api:latest
```
> **💡 路径说明:**
> - `./data:/data` - 相对路径,数据保存在当前目录的 data 文件夹
> - 也可使用绝对路径,如:`/your/custom/path:/data`
</details>
<details>
<summary><strong>方式 3宝塔面板</strong></summary>
1. 安装宝塔面板(≥ 9.2.0 版本)
2. 在应用商店搜索 **New-API**
3. 一键安装
📖 [图文教程](./docs/BT.md)
</details>
### ⚠️ 多机部署注意事项
> [!WARNING]
> - **必须设置** `SESSION_SECRET` - 否则登录状态不一致
> - **公用 Redis 必须设置** `CRYPTO_SECRET` - 否则数据无法解密
### 🔄 渠道重试与缓存
**重试配置:** `设置 → 运营设置 → 通用设置 → 失败重试次数`
**缓存配置:**
- `REDIS_CONN_STRING`Redis 缓存(推荐)
- `MEMORY_CACHE_ENABLED`:内存缓存
---
## 🔗 相关项目
### 上游项目
| 项目 | 说明 |
|------|------|
| [One API](https://github.com/songquanpeng/one-api) | 原版项目基础 |
| [Midjourney-Proxy](https://github.com/novicezk/midjourney-proxy) | Midjourney 接口支持 |
### 配套工具
| 项目 | 说明 |
|------|------|
| [neko-api-key-tool](https://github.com/Calcium-Ion/neko-api-key-tool) | Key 额度查询工具 |
| [new-api-horizon](https://github.com/Calcium-Ion/new-api-horizon) | New API 高性能优化版 |
---
## 💬 帮助支持
### 📖 文档资源
| 资源 | 链接 |
|------|------|
| 📘 常见问题 | [FAQ](https://docs.newapi.pro/zh/docs/support/faq) |
| 💬 社区交流 | [交流渠道](https://docs.newapi.pro/zh/docs/support/community-interaction) |
| 🐛 反馈问题 | [问题反馈](https://docs.newapi.pro/zh/docs/support/feedback-issues) |
| 📚 完整文档 | [官方文档](https://docs.newapi.pro/zh/docs) |
### 🤝 贡献指南
欢迎各种形式的贡献!
- 🐛 报告 Bug
- 💡 提出新功能
- 📝 改进文档
- 🔧 提交代码
---
## 🌟 Star History
<div align="center">
[![Star History Chart](https://api.star-history.com/svg?repos=Calcium-Ion/new-api&type=Date)](https://star-history.com/#Calcium-Ion/new-api&Date)
</div>
---
<div align="center">
### 💖 感谢使用 New API
如果这个项目对你有帮助,欢迎给我们一个 ⭐️ Star
**[官方文档](https://docs.newapi.pro/zh/docs)** • **[问题反馈](https://github.com/Calcium-Ion/new-api/issues)** • **[最新发布](https://github.com/Calcium-Ion/new-api/releases)**
<sub>Built with ❤️ by QuantumNous</sub>
</div>

View File

@@ -73,6 +73,8 @@ func ChannelType2APIType(channelType int) (int, bool) {
apiType = constant.APITypeMiniMax
case constant.ChannelTypeReplicate:
apiType = constant.APITypeReplicate
case constant.ChannelTypeCodex:
apiType = constant.APITypeCodex
}
if apiType == -1 {
return constant.APITypeOpenAI, false

365
common/body_storage.go Normal file
View File

@@ -0,0 +1,365 @@
package common
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
)
// BodyStorage 请求体存储接口
type BodyStorage interface {
io.ReadSeeker
io.Closer
// Bytes 获取全部内容
Bytes() ([]byte, error)
// Size 获取数据大小
Size() int64
// IsDisk 是否是磁盘存储
IsDisk() bool
}
// ErrStorageClosed 存储已关闭错误
var ErrStorageClosed = fmt.Errorf("body storage is closed")
// memoryStorage 内存存储实现
type memoryStorage struct {
data []byte
reader *bytes.Reader
size int64
closed int32
mu sync.Mutex
}
func newMemoryStorage(data []byte) *memoryStorage {
size := int64(len(data))
IncrementMemoryBuffers(size)
return &memoryStorage{
data: data,
reader: bytes.NewReader(data),
size: size,
}
}
func (m *memoryStorage) Read(p []byte) (n int, err error) {
m.mu.Lock()
defer m.mu.Unlock()
if atomic.LoadInt32(&m.closed) == 1 {
return 0, ErrStorageClosed
}
return m.reader.Read(p)
}
func (m *memoryStorage) Seek(offset int64, whence int) (int64, error) {
m.mu.Lock()
defer m.mu.Unlock()
if atomic.LoadInt32(&m.closed) == 1 {
return 0, ErrStorageClosed
}
return m.reader.Seek(offset, whence)
}
func (m *memoryStorage) Close() error {
m.mu.Lock()
defer m.mu.Unlock()
if atomic.CompareAndSwapInt32(&m.closed, 0, 1) {
DecrementMemoryBuffers(m.size)
}
return nil
}
func (m *memoryStorage) Bytes() ([]byte, error) {
m.mu.Lock()
defer m.mu.Unlock()
if atomic.LoadInt32(&m.closed) == 1 {
return nil, ErrStorageClosed
}
return m.data, nil
}
func (m *memoryStorage) Size() int64 {
return m.size
}
func (m *memoryStorage) IsDisk() bool {
return false
}
// diskStorage 磁盘存储实现
type diskStorage struct {
file *os.File
filePath string
size int64
closed int32
mu sync.Mutex
}
func newDiskStorage(data []byte, cachePath string) (*diskStorage, error) {
// 确定缓存目录
dir := cachePath
if dir == "" {
dir = os.TempDir()
}
dir = filepath.Join(dir, "new-api-body-cache")
// 确保目录存在
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("failed to create cache directory: %w", err)
}
// 创建临时文件
filename := fmt.Sprintf("body-%s-%d.tmp", uuid.New().String()[:8], time.Now().UnixNano())
filePath := filepath.Join(dir, filename)
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0600)
if err != nil {
return nil, fmt.Errorf("failed to create temp file: %w", err)
}
// 写入数据
n, err := file.Write(data)
if err != nil {
file.Close()
os.Remove(filePath)
return nil, fmt.Errorf("failed to write to temp file: %w", err)
}
// 重置文件指针
if _, err := file.Seek(0, io.SeekStart); err != nil {
file.Close()
os.Remove(filePath)
return nil, fmt.Errorf("failed to seek temp file: %w", err)
}
size := int64(n)
IncrementDiskFiles(size)
return &diskStorage{
file: file,
filePath: filePath,
size: size,
}, nil
}
func newDiskStorageFromReader(reader io.Reader, maxBytes int64, cachePath string) (*diskStorage, error) {
// 确定缓存目录
dir := cachePath
if dir == "" {
dir = os.TempDir()
}
dir = filepath.Join(dir, "new-api-body-cache")
// 确保目录存在
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("failed to create cache directory: %w", err)
}
// 创建临时文件
filename := fmt.Sprintf("body-%s-%d.tmp", uuid.New().String()[:8], time.Now().UnixNano())
filePath := filepath.Join(dir, filename)
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0600)
if err != nil {
return nil, fmt.Errorf("failed to create temp file: %w", err)
}
// 从 reader 读取并写入文件
written, err := io.Copy(file, io.LimitReader(reader, maxBytes+1))
if err != nil {
file.Close()
os.Remove(filePath)
return nil, fmt.Errorf("failed to write to temp file: %w", err)
}
if written > maxBytes {
file.Close()
os.Remove(filePath)
return nil, ErrRequestBodyTooLarge
}
// 重置文件指针
if _, err := file.Seek(0, io.SeekStart); err != nil {
file.Close()
os.Remove(filePath)
return nil, fmt.Errorf("failed to seek temp file: %w", err)
}
IncrementDiskFiles(written)
return &diskStorage{
file: file,
filePath: filePath,
size: written,
}, nil
}
func (d *diskStorage) Read(p []byte) (n int, err error) {
d.mu.Lock()
defer d.mu.Unlock()
if atomic.LoadInt32(&d.closed) == 1 {
return 0, ErrStorageClosed
}
return d.file.Read(p)
}
func (d *diskStorage) Seek(offset int64, whence int) (int64, error) {
d.mu.Lock()
defer d.mu.Unlock()
if atomic.LoadInt32(&d.closed) == 1 {
return 0, ErrStorageClosed
}
return d.file.Seek(offset, whence)
}
func (d *diskStorage) Close() error {
d.mu.Lock()
defer d.mu.Unlock()
if atomic.CompareAndSwapInt32(&d.closed, 0, 1) {
d.file.Close()
os.Remove(d.filePath)
DecrementDiskFiles(d.size)
}
return nil
}
func (d *diskStorage) Bytes() ([]byte, error) {
d.mu.Lock()
defer d.mu.Unlock()
if atomic.LoadInt32(&d.closed) == 1 {
return nil, ErrStorageClosed
}
// 保存当前位置
currentPos, err := d.file.Seek(0, io.SeekCurrent)
if err != nil {
return nil, err
}
// 移动到开头
if _, err := d.file.Seek(0, io.SeekStart); err != nil {
return nil, err
}
// 读取全部内容
data := make([]byte, d.size)
_, err = io.ReadFull(d.file, data)
if err != nil {
return nil, err
}
// 恢复位置
if _, err := d.file.Seek(currentPos, io.SeekStart); err != nil {
return nil, err
}
return data, nil
}
func (d *diskStorage) Size() int64 {
return d.size
}
func (d *diskStorage) IsDisk() bool {
return true
}
// CreateBodyStorage 根据数据大小创建合适的存储
func CreateBodyStorage(data []byte) (BodyStorage, error) {
size := int64(len(data))
threshold := GetDiskCacheThresholdBytes()
// 检查是否应该使用磁盘缓存
if IsDiskCacheEnabled() &&
size >= threshold &&
IsDiskCacheAvailable(size) {
storage, err := newDiskStorage(data, GetDiskCachePath())
if err != nil {
// 如果磁盘存储失败,回退到内存存储
SysError(fmt.Sprintf("failed to create disk storage, falling back to memory: %v", err))
return newMemoryStorage(data), nil
}
return storage, nil
}
return newMemoryStorage(data), nil
}
// CreateBodyStorageFromReader 从 Reader 创建存储(用于大请求的流式处理)
func CreateBodyStorageFromReader(reader io.Reader, contentLength int64, maxBytes int64) (BodyStorage, error) {
threshold := GetDiskCacheThresholdBytes()
// 如果启用了磁盘缓存且内容长度超过阈值,直接使用磁盘存储
if IsDiskCacheEnabled() &&
contentLength > 0 &&
contentLength >= threshold &&
IsDiskCacheAvailable(contentLength) {
storage, err := newDiskStorageFromReader(reader, maxBytes, GetDiskCachePath())
if err != nil {
if IsRequestBodyTooLargeError(err) {
return nil, err
}
// 磁盘存储失败reader 已被消费,无法安全回退
// 直接返回错误而非尝试回退(因为 reader 数据已丢失)
return nil, fmt.Errorf("disk storage creation failed: %w", err)
}
IncrementDiskCacheHits()
return storage, nil
}
// 使用内存读取
data, err := io.ReadAll(io.LimitReader(reader, maxBytes+1))
if err != nil {
return nil, err
}
if int64(len(data)) > maxBytes {
return nil, ErrRequestBodyTooLarge
}
storage, err := CreateBodyStorage(data)
if err != nil {
return nil, err
}
// 如果最终使用内存存储,记录内存缓存命中
if !storage.IsDisk() {
IncrementMemoryCacheHits()
} else {
IncrementDiskCacheHits()
}
return storage, nil
}
// CleanupOldCacheFiles 清理旧的缓存文件(用于启动时清理残留)
func CleanupOldCacheFiles() {
cachePath := GetDiskCachePath()
if cachePath == "" {
cachePath = os.TempDir()
}
dir := filepath.Join(cachePath, "new-api-body-cache")
entries, err := os.ReadDir(dir)
if err != nil {
return // 目录不存在或无法读取
}
now := time.Now()
for _, entry := range entries {
if entry.IsDir() {
continue
}
info, err := entry.Info()
if err != nil {
continue
}
// 删除超过 5 分钟的旧文件
if now.Sub(info.ModTime()) > 5*time.Minute {
os.Remove(filepath.Join(dir, entry.Name()))
}
}
}

View File

@@ -1,6 +1,7 @@
package common
import (
"crypto/tls"
//"os"
//"strconv"
"sync"
@@ -73,6 +74,9 @@ var MemoryCacheEnabled bool
var LogConsumeEnabled = true
var TLSInsecureSkipVerify bool
var InsecureTLSConfig = &tls.Config{InsecureSkipVerify: true}
var SMTPServer = ""
var SMTPPort = 587
var SMTPSSLEnabled = false

156
common/disk_cache_config.go Normal file
View File

@@ -0,0 +1,156 @@
package common
import (
"sync"
"sync/atomic"
)
// DiskCacheConfig 磁盘缓存配置(由 performance_setting 包更新)
type DiskCacheConfig struct {
// Enabled 是否启用磁盘缓存
Enabled bool
// ThresholdMB 触发磁盘缓存的请求体大小阈值MB
ThresholdMB int
// MaxSizeMB 磁盘缓存最大总大小MB
MaxSizeMB int
// Path 磁盘缓存目录
Path string
}
// 全局磁盘缓存配置
var diskCacheConfig = DiskCacheConfig{
Enabled: false,
ThresholdMB: 10,
MaxSizeMB: 1024,
Path: "",
}
var diskCacheConfigMu sync.RWMutex
// GetDiskCacheConfig 获取磁盘缓存配置
func GetDiskCacheConfig() DiskCacheConfig {
diskCacheConfigMu.RLock()
defer diskCacheConfigMu.RUnlock()
return diskCacheConfig
}
// SetDiskCacheConfig 设置磁盘缓存配置
func SetDiskCacheConfig(config DiskCacheConfig) {
diskCacheConfigMu.Lock()
defer diskCacheConfigMu.Unlock()
diskCacheConfig = config
}
// IsDiskCacheEnabled 是否启用磁盘缓存
func IsDiskCacheEnabled() bool {
diskCacheConfigMu.RLock()
defer diskCacheConfigMu.RUnlock()
return diskCacheConfig.Enabled
}
// GetDiskCacheThresholdBytes 获取磁盘缓存阈值(字节)
func GetDiskCacheThresholdBytes() int64 {
diskCacheConfigMu.RLock()
defer diskCacheConfigMu.RUnlock()
return int64(diskCacheConfig.ThresholdMB) << 20
}
// GetDiskCacheMaxSizeBytes 获取磁盘缓存最大大小(字节)
func GetDiskCacheMaxSizeBytes() int64 {
diskCacheConfigMu.RLock()
defer diskCacheConfigMu.RUnlock()
return int64(diskCacheConfig.MaxSizeMB) << 20
}
// GetDiskCachePath 获取磁盘缓存目录
func GetDiskCachePath() string {
diskCacheConfigMu.RLock()
defer diskCacheConfigMu.RUnlock()
return diskCacheConfig.Path
}
// DiskCacheStats 磁盘缓存统计信息
type DiskCacheStats struct {
// 当前活跃的磁盘缓存文件数
ActiveDiskFiles int64 `json:"active_disk_files"`
// 当前磁盘缓存总大小(字节)
CurrentDiskUsageBytes int64 `json:"current_disk_usage_bytes"`
// 当前内存缓存数量
ActiveMemoryBuffers int64 `json:"active_memory_buffers"`
// 当前内存缓存总大小(字节)
CurrentMemoryUsageBytes int64 `json:"current_memory_usage_bytes"`
// 磁盘缓存命中次数
DiskCacheHits int64 `json:"disk_cache_hits"`
// 内存缓存命中次数
MemoryCacheHits int64 `json:"memory_cache_hits"`
// 磁盘缓存最大限制(字节)
DiskCacheMaxBytes int64 `json:"disk_cache_max_bytes"`
// 磁盘缓存阈值(字节)
DiskCacheThresholdBytes int64 `json:"disk_cache_threshold_bytes"`
}
var diskCacheStats DiskCacheStats
// GetDiskCacheStats 获取缓存统计信息
func GetDiskCacheStats() DiskCacheStats {
stats := DiskCacheStats{
ActiveDiskFiles: atomic.LoadInt64(&diskCacheStats.ActiveDiskFiles),
CurrentDiskUsageBytes: atomic.LoadInt64(&diskCacheStats.CurrentDiskUsageBytes),
ActiveMemoryBuffers: atomic.LoadInt64(&diskCacheStats.ActiveMemoryBuffers),
CurrentMemoryUsageBytes: atomic.LoadInt64(&diskCacheStats.CurrentMemoryUsageBytes),
DiskCacheHits: atomic.LoadInt64(&diskCacheStats.DiskCacheHits),
MemoryCacheHits: atomic.LoadInt64(&diskCacheStats.MemoryCacheHits),
DiskCacheMaxBytes: GetDiskCacheMaxSizeBytes(),
DiskCacheThresholdBytes: GetDiskCacheThresholdBytes(),
}
return stats
}
// IncrementDiskFiles 增加磁盘文件计数
func IncrementDiskFiles(size int64) {
atomic.AddInt64(&diskCacheStats.ActiveDiskFiles, 1)
atomic.AddInt64(&diskCacheStats.CurrentDiskUsageBytes, size)
}
// DecrementDiskFiles 减少磁盘文件计数
func DecrementDiskFiles(size int64) {
atomic.AddInt64(&diskCacheStats.ActiveDiskFiles, -1)
atomic.AddInt64(&diskCacheStats.CurrentDiskUsageBytes, -size)
}
// IncrementMemoryBuffers 增加内存缓存计数
func IncrementMemoryBuffers(size int64) {
atomic.AddInt64(&diskCacheStats.ActiveMemoryBuffers, 1)
atomic.AddInt64(&diskCacheStats.CurrentMemoryUsageBytes, size)
}
// DecrementMemoryBuffers 减少内存缓存计数
func DecrementMemoryBuffers(size int64) {
atomic.AddInt64(&diskCacheStats.ActiveMemoryBuffers, -1)
atomic.AddInt64(&diskCacheStats.CurrentMemoryUsageBytes, -size)
}
// IncrementDiskCacheHits 增加磁盘缓存命中次数
func IncrementDiskCacheHits() {
atomic.AddInt64(&diskCacheStats.DiskCacheHits, 1)
}
// IncrementMemoryCacheHits 增加内存缓存命中次数
func IncrementMemoryCacheHits() {
atomic.AddInt64(&diskCacheStats.MemoryCacheHits, 1)
}
// ResetDiskCacheStats 重置统计信息(不重置当前使用量)
func ResetDiskCacheStats() {
atomic.StoreInt64(&diskCacheStats.DiskCacheHits, 0)
atomic.StoreInt64(&diskCacheStats.MemoryCacheHits, 0)
}
// IsDiskCacheAvailable 检查是否可以创建新的磁盘缓存
func IsDiskCacheAvailable(requestSize int64) bool {
if !IsDiskCacheEnabled() {
return false
}
maxBytes := GetDiskCacheMaxSizeBytes()
currentUsage := atomic.LoadInt64(&diskCacheStats.CurrentDiskUsageBytes)
return currentUsage+requestSize <= maxBytes
}

View File

@@ -17,13 +17,14 @@ type EndpointInfo struct {
// defaultEndpointInfoMap 保存内置端点的默认 Path 与 Method
var defaultEndpointInfoMap = map[constant.EndpointType]EndpointInfo{
constant.EndpointTypeOpenAI: {Path: "/v1/chat/completions", Method: "POST"},
constant.EndpointTypeOpenAIResponse: {Path: "/v1/responses", Method: "POST"},
constant.EndpointTypeAnthropic: {Path: "/v1/messages", Method: "POST"},
constant.EndpointTypeGemini: {Path: "/v1beta/models/{model}:generateContent", Method: "POST"},
constant.EndpointTypeJinaRerank: {Path: "/rerank", Method: "POST"},
constant.EndpointTypeImageGeneration: {Path: "/v1/images/generations", Method: "POST"},
constant.EndpointTypeEmbeddings: {Path: "/v1/embeddings", Method: "POST"},
constant.EndpointTypeOpenAI: {Path: "/v1/chat/completions", Method: "POST"},
constant.EndpointTypeOpenAIResponse: {Path: "/v1/responses", Method: "POST"},
constant.EndpointTypeOpenAIResponseCompact: {Path: "/v1/responses/compact", Method: "POST"},
constant.EndpointTypeAnthropic: {Path: "/v1/messages", Method: "POST"},
constant.EndpointTypeGemini: {Path: "/v1beta/models/{model}:generateContent", Method: "POST"},
constant.EndpointTypeJinaRerank: {Path: "/v1/rerank", Method: "POST"},
constant.EndpointTypeImageGeneration: {Path: "/v1/images/generations", Method: "POST"},
constant.EndpointTypeEmbeddings: {Path: "/v1/embeddings", Method: "POST"},
}
// GetDefaultEndpointInfo 返回指定端点类型的默认信息以及是否存在

View File

@@ -18,6 +18,7 @@ import (
)
const KeyRequestBody = "key_request_body"
const KeyBodyStorage = "key_body_storage"
var ErrRequestBodyTooLarge = errors.New("request body too large")
@@ -33,42 +34,99 @@ func IsRequestBodyTooLargeError(err error) bool {
}
func GetRequestBody(c *gin.Context) ([]byte, error) {
// 首先检查是否有 BodyStorage 缓存
if storage, exists := c.Get(KeyBodyStorage); exists && storage != nil {
if bs, ok := storage.(BodyStorage); ok {
if _, err := bs.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("failed to seek body storage: %w", err)
}
return bs.Bytes()
}
}
// 检查旧的缓存方式
cached, exists := c.Get(KeyRequestBody)
if exists && cached != nil {
if b, ok := cached.([]byte); ok {
return b, nil
}
}
maxMB := constant.MaxRequestBodyMB
if maxMB <= 0 {
// no limit
body, err := io.ReadAll(c.Request.Body)
_ = c.Request.Body.Close()
if err != nil {
return nil, err
}
c.Set(KeyRequestBody, body)
return body, nil
maxMB = 128 // 默认 128MB
}
maxBytes := int64(maxMB) << 20
limited := io.LimitReader(c.Request.Body, maxBytes+1)
body, err := io.ReadAll(limited)
contentLength := c.Request.ContentLength
// 使用新的存储系统
storage, err := CreateBodyStorageFromReader(c.Request.Body, contentLength, maxBytes)
_ = c.Request.Body.Close()
if err != nil {
_ = c.Request.Body.Close()
if IsRequestBodyTooLargeError(err) {
return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
}
return nil, err
}
_ = c.Request.Body.Close()
if int64(len(body)) > maxBytes {
return nil, errors.Wrap(ErrRequestBodyTooLarge, fmt.Sprintf("request body exceeds %d MB", maxMB))
// 缓存存储对象
c.Set(KeyBodyStorage, storage)
// 获取字节数据
body, err := storage.Bytes()
if err != nil {
return nil, err
}
// 同时设置旧的缓存键以保持兼容性
c.Set(KeyRequestBody, body)
return body, nil
}
// GetBodyStorage 获取请求体存储对象(用于需要多次读取的场景)
func GetBodyStorage(c *gin.Context) (BodyStorage, error) {
// 检查是否已有存储
if storage, exists := c.Get(KeyBodyStorage); exists && storage != nil {
if bs, ok := storage.(BodyStorage); ok {
if _, err := bs.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("failed to seek body storage: %w", err)
}
return bs, nil
}
}
// 如果没有,调用 GetRequestBody 创建存储
_, err := GetRequestBody(c)
if err != nil {
return nil, err
}
// 再次获取存储
if storage, exists := c.Get(KeyBodyStorage); exists && storage != nil {
if bs, ok := storage.(BodyStorage); ok {
if _, err := bs.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("failed to seek body storage: %w", err)
}
return bs, nil
}
}
return nil, errors.New("failed to get body storage")
}
// CleanupBodyStorage 清理请求体存储(应在请求结束时调用)
func CleanupBodyStorage(c *gin.Context) {
if storage, exists := c.Get(KeyBodyStorage); exists && storage != nil {
if bs, ok := storage.(BodyStorage); ok {
bs.Close()
}
c.Set(KeyBodyStorage, nil)
}
}
func UnmarshalBodyReusable(c *gin.Context, v any) error {
requestBody, err := GetRequestBody(c)
if err != nil {

View File

@@ -4,6 +4,7 @@ import (
"flag"
"fmt"
"log"
"net/http"
"os"
"path/filepath"
"strconv"
@@ -81,6 +82,16 @@ func InitEnv() {
DebugEnabled = os.Getenv("DEBUG") == "true"
MemoryCacheEnabled = os.Getenv("MEMORY_CACHE_ENABLED") == "true"
IsMasterNode = os.Getenv("NODE_TYPE") != "slave"
TLSInsecureSkipVerify = GetEnvOrDefaultBool("TLS_INSECURE_SKIP_VERIFY", false)
if TLSInsecureSkipVerify {
if tr, ok := http.DefaultTransport.(*http.Transport); ok && tr != nil {
if tr.TLSClientConfig != nil {
tr.TLSClientConfig.InsecureSkipVerify = true
} else {
tr.TLSClientConfig = InsecureTLSConfig
}
}
}
// Parse requestInterval and set RequestInterval
requestInterval, _ = strconv.Atoi(os.Getenv("POLLING_INTERVAL"))
@@ -118,7 +129,7 @@ func initConstantEnv() {
constant.MaxFileDownloadMB = GetEnvOrDefault("MAX_FILE_DOWNLOAD_MB", 64)
constant.StreamScannerMaxBufferMB = GetEnvOrDefault("STREAM_SCANNER_MAX_BUFFER_MB", 64)
// MaxRequestBodyMB 请求体最大大小(解压后),用于防止超大请求/zip bomb导致内存暴涨
constant.MaxRequestBodyMB = GetEnvOrDefault("MAX_REQUEST_BODY_MB", 64)
constant.MaxRequestBodyMB = GetEnvOrDefault("MAX_REQUEST_BODY_MB", 128)
// ForceStreamOption 覆盖请求参数强制返回usage信息
constant.ForceStreamOption = GetEnvOrDefaultBool("FORCE_STREAM_OPTION", true)
constant.CountToken = GetEnvOrDefaultBool("CountToken", true)
@@ -148,4 +159,17 @@ func initConstantEnv() {
}
constant.TaskPricePatches = taskPricePatches
}
// Initialize trusted redirect domains for URL validation
trustedDomainsStr := GetEnvOrDefaultString("TRUSTED_REDIRECT_DOMAINS", "")
var trustedDomains []string
domains := strings.Split(trustedDomainsStr, ",")
for _, domain := range domains {
trimmedDomain := strings.TrimSpace(domain)
if trimmedDomain != "" {
// Normalize domain to lowercase
trustedDomains = append(trustedDomains, strings.ToLower(trimmedDomain))
}
}
constant.TrustedRedirectDomains = trustedDomains
}

View File

@@ -16,6 +16,8 @@ var (
maskURLPattern = regexp.MustCompile(`(http|https)://[^\s/$.?#].[^\s]*`)
maskDomainPattern = regexp.MustCompile(`\b(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}\b`)
maskIPPattern = regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
// maskApiKeyPattern matches patterns like 'api_key:xxx' or "api_key:xxx" to mask the API key value
maskApiKeyPattern = regexp.MustCompile(`(['"]?)api_key:([^\s'"]+)(['"]?)`)
)
func GetStringIfEmpty(str string, defaultValue string) string {
@@ -235,5 +237,8 @@ func MaskSensitiveInfo(str string) string {
// Mask IP addresses
str = maskIPPattern.ReplaceAllString(str, "***.***.***.***")
// Mask API keys (e.g., "api_key:AIzaSyAAAaUooTUni8AdaOkSRMda30n_Q4vrV70" -> "api_key:***")
str = maskApiKeyPattern.ReplaceAllString(str, "${1}api_key:***${3}")
return str
}

39
common/url_validator.go Normal file
View File

@@ -0,0 +1,39 @@
package common
import (
"fmt"
"net/url"
"strings"
"github.com/QuantumNous/new-api/constant"
)
// ValidateRedirectURL validates that a redirect URL is safe to use.
// It checks that:
// - The URL is properly formatted
// - The scheme is either http or https
// - The domain is in the trusted domains list (exact match or subdomain)
//
// Returns nil if the URL is valid and trusted, otherwise returns an error
// describing why the validation failed.
func ValidateRedirectURL(rawURL string) error {
// Parse the URL
parsedURL, err := url.Parse(rawURL)
if err != nil {
return fmt.Errorf("invalid URL format: %s", err.Error())
}
if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
return fmt.Errorf("invalid URL scheme: only http and https are allowed")
}
domain := strings.ToLower(parsedURL.Hostname())
for _, trustedDomain := range constant.TrustedRedirectDomains {
if domain == trustedDomain || strings.HasSuffix(domain, "."+trustedDomain) {
return nil
}
}
return fmt.Errorf("domain %s is not in the trusted domains list", domain)
}

View File

@@ -0,0 +1,134 @@
package common
import (
"testing"
"github.com/QuantumNous/new-api/constant"
)
func TestValidateRedirectURL(t *testing.T) {
// Save original trusted domains and restore after test
originalDomains := constant.TrustedRedirectDomains
defer func() {
constant.TrustedRedirectDomains = originalDomains
}()
tests := []struct {
name string
url string
trustedDomains []string
wantErr bool
errContains string
}{
// Valid cases
{
name: "exact domain match with https",
url: "https://example.com/success",
trustedDomains: []string{"example.com"},
wantErr: false,
},
{
name: "exact domain match with http",
url: "http://example.com/callback",
trustedDomains: []string{"example.com"},
wantErr: false,
},
{
name: "subdomain match",
url: "https://sub.example.com/success",
trustedDomains: []string{"example.com"},
wantErr: false,
},
{
name: "case insensitive domain",
url: "https://EXAMPLE.COM/success",
trustedDomains: []string{"example.com"},
wantErr: false,
},
// Invalid cases - untrusted domain
{
name: "untrusted domain",
url: "https://evil.com/phishing",
trustedDomains: []string{"example.com"},
wantErr: true,
errContains: "not in the trusted domains list",
},
{
name: "suffix attack - fakeexample.com",
url: "https://fakeexample.com/success",
trustedDomains: []string{"example.com"},
wantErr: true,
errContains: "not in the trusted domains list",
},
{
name: "empty trusted domains list",
url: "https://example.com/success",
trustedDomains: []string{},
wantErr: true,
errContains: "not in the trusted domains list",
},
// Invalid cases - scheme
{
name: "javascript scheme",
url: "javascript:alert('xss')",
trustedDomains: []string{"example.com"},
wantErr: true,
errContains: "invalid URL scheme",
},
{
name: "data scheme",
url: "data:text/html,<script>alert('xss')</script>",
trustedDomains: []string{"example.com"},
wantErr: true,
errContains: "invalid URL scheme",
},
// Edge cases
{
name: "empty URL",
url: "",
trustedDomains: []string{"example.com"},
wantErr: true,
errContains: "invalid URL scheme",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Set up trusted domains for this test case
constant.TrustedRedirectDomains = tt.trustedDomains
err := ValidateRedirectURL(tt.url)
if tt.wantErr {
if err == nil {
t.Errorf("ValidateRedirectURL(%q) expected error containing %q, got nil", tt.url, tt.errContains)
return
}
if tt.errContains != "" && !contains(err.Error(), tt.errContains) {
t.Errorf("ValidateRedirectURL(%q) error = %q, want error containing %q", tt.url, err.Error(), tt.errContains)
}
} else {
if err != nil {
t.Errorf("ValidateRedirectURL(%q) unexpected error: %v", tt.url, err)
}
}
})
}
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(substr) == 0 ||
(len(s) > 0 && len(substr) > 0 && findSubstring(s, substr)))
}
func findSubstring(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}

View File

@@ -263,7 +263,7 @@ func GetTimestamp() int64 {
}
func GetTimeString() string {
now := time.Now()
now := time.Now().UTC()
return fmt.Sprintf("%s%d", now.Format("20060102150405"), now.UnixNano()%1e9)
}

View File

@@ -35,5 +35,6 @@ const (
APITypeSubmodel
APITypeMiniMax
APITypeReplicate
APITypeCodex
APITypeDummy // this one is only for count, do not add any channel after this
)

View File

@@ -54,6 +54,7 @@ const (
ChannelTypeDoubaoVideo = 54
ChannelTypeSora = 55
ChannelTypeReplicate = 56
ChannelTypeCodex = 57
ChannelTypeDummy // this one is only for count, do not add any channel after this
)
@@ -116,6 +117,7 @@ var ChannelBaseURLs = []string{
"https://ark.cn-beijing.volces.com", //54
"https://api.openai.com", //55
"https://api.replicate.com", //56
"https://chatgpt.com", //57
}
var ChannelTypeNames = map[int]string{
@@ -172,6 +174,7 @@ var ChannelTypeNames = map[int]string{
ChannelTypeDoubaoVideo: "DoubaoVideo",
ChannelTypeSora: "Sora",
ChannelTypeReplicate: "Replicate",
ChannelTypeCodex: "Codex",
}
func GetChannelTypeName(channelType int) string {

View File

@@ -55,4 +55,8 @@ const (
ContextKeyLocalCountTokens ContextKey = "local_count_tokens"
ContextKeySystemPromptOverride ContextKey = "system_prompt_override"
// ContextKeyAdminRejectReason stores an admin-only reject/block reason extracted from upstream responses.
// It is not returned to end users, but can be persisted into consume/error logs for debugging.
ContextKeyAdminRejectReason ContextKey = "admin_reject_reason"
)

View File

@@ -3,14 +3,15 @@ package constant
type EndpointType string
const (
EndpointTypeOpenAI EndpointType = "openai"
EndpointTypeOpenAIResponse EndpointType = "openai-response"
EndpointTypeAnthropic EndpointType = "anthropic"
EndpointTypeGemini EndpointType = "gemini"
EndpointTypeJinaRerank EndpointType = "jina-rerank"
EndpointTypeImageGeneration EndpointType = "image-generation"
EndpointTypeEmbeddings EndpointType = "embeddings"
EndpointTypeOpenAIVideo EndpointType = "openai-video"
EndpointTypeOpenAI EndpointType = "openai"
EndpointTypeOpenAIResponse EndpointType = "openai-response"
EndpointTypeOpenAIResponseCompact EndpointType = "openai-response-compact"
EndpointTypeAnthropic EndpointType = "anthropic"
EndpointTypeGemini EndpointType = "gemini"
EndpointTypeJinaRerank EndpointType = "jina-rerank"
EndpointTypeImageGeneration EndpointType = "image-generation"
EndpointTypeEmbeddings EndpointType = "embeddings"
EndpointTypeOpenAIVideo EndpointType = "openai-video"
//EndpointTypeMidjourney EndpointType = "midjourney-proxy"
//EndpointTypeSuno EndpointType = "suno-proxy"
//EndpointTypeKling EndpointType = "kling"

View File

@@ -20,3 +20,7 @@ var TaskQueryLimit int
// temporary variable for sora patch, will be removed in future
var TaskPricePatches []string
// TrustedRedirectDomains is a list of trusted domains for redirect URL validation.
// Domains support subdomain matching (e.g., "example.com" matches "sub.example.com").
var TrustedRedirectDomains []string

View File

@@ -26,6 +26,7 @@ import (
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/types"
"github.com/bytedance/gopkg/util/gopool"
@@ -84,6 +85,11 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
} else {
// 如果没有指定端点类型,使用原有的自动检测逻辑
if strings.Contains(strings.ToLower(testModel), "rerank") {
requestPath = "/v1/rerank"
}
// 先判断是否为 Embedding 模型
if strings.Contains(strings.ToLower(testModel), "embedding") ||
strings.HasPrefix(testModel, "m3e") || // m3e 系列模型
@@ -102,6 +108,14 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
if strings.Contains(strings.ToLower(testModel), "codex") {
requestPath = "/v1/responses"
}
// responses compaction models (must use /v1/responses/compact)
if strings.HasSuffix(testModel, ratio_setting.CompactModelSuffix) {
requestPath = "/v1/responses/compact"
}
}
if strings.HasPrefix(requestPath, "/v1/responses/compact") {
testModel = ratio_setting.WithCompactModelSuffix(testModel)
}
c.Request = &http.Request{
@@ -145,6 +159,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
relayFormat = types.RelayFormatOpenAI
case constant.EndpointTypeOpenAIResponse:
relayFormat = types.RelayFormatOpenAIResponses
case constant.EndpointTypeOpenAIResponseCompact:
relayFormat = types.RelayFormatOpenAIResponsesCompaction
case constant.EndpointTypeAnthropic:
relayFormat = types.RelayFormatClaude
case constant.EndpointTypeGemini:
@@ -179,6 +195,9 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
if c.Request.URL.Path == "/v1/responses" {
relayFormat = types.RelayFormatOpenAIResponses
}
if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") {
relayFormat = types.RelayFormatOpenAIResponsesCompaction
}
}
request := buildTestRequest(testModel, endpointType, channel)
@@ -193,6 +212,7 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
}
info.IsChannelTest = true
info.InitChannelMeta(c)
err = helper.ModelMappedHelper(c, info, request)
@@ -209,6 +229,15 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
request.SetModelName(testModel)
apiType, _ := common.ChannelType2APIType(channel.Type)
if info.RelayMode == relayconstant.RelayModeResponsesCompact &&
apiType != constant.APITypeOpenAI &&
apiType != constant.APITypeCodex {
return testResult{
context: c,
localErr: fmt.Errorf("responses compaction test only supports openai/codex channels, got api type %d", apiType),
newAPIError: types.NewError(fmt.Errorf("unsupported api type: %d", apiType), types.ErrorCodeInvalidApiType),
}
}
adaptor := relay.GetAdaptor(apiType)
if adaptor == nil {
return testResult{
@@ -281,6 +310,25 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
newAPIError: types.NewError(errors.New("invalid response request type"), types.ErrorCodeConvertRequestFailed),
}
}
case relayconstant.RelayModeResponsesCompact:
// Response compaction request - convert to OpenAIResponsesRequest before adapting
switch req := request.(type) {
case *dto.OpenAIResponsesCompactionRequest:
convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, dto.OpenAIResponsesRequest{
Model: req.Model,
Input: req.Input,
Instructions: req.Instructions,
PreviousResponseID: req.PreviousResponseID,
})
case *dto.OpenAIResponsesRequest:
convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, *req)
default:
return testResult{
context: c,
localErr: errors.New("invalid response compaction request type"),
newAPIError: types.NewError(errors.New("invalid response compaction request type"), types.ErrorCodeConvertRequestFailed),
}
}
default:
// Chat/Completion 等其他请求类型
if generalReq, ok := request.(*dto.GeneralOpenAIRequest); ok {
@@ -309,8 +357,29 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
newAPIError: types.NewError(err, types.ErrorCodeJsonMarshalFailed),
}
}
//jsonData, err = relaycommon.RemoveDisabledFields(jsonData, info.ChannelOtherSettings)
//if err != nil {
// return testResult{
// context: c,
// localErr: err,
// newAPIError: types.NewError(err, types.ErrorCodeConvertRequestFailed),
// }
//}
if len(info.ParamOverride) > 0 {
jsonData, err = relaycommon.ApplyParamOverride(jsonData, info.ParamOverride, relaycommon.BuildParamOverrideContext(info))
if err != nil {
return testResult{
context: c,
localErr: err,
newAPIError: types.NewError(err, types.ErrorCodeChannelParamOverrideInvalid),
}
}
}
requestBody := bytes.NewBuffer(jsonData)
c.Request.Body = io.NopCloser(requestBody)
c.Request.Body = io.NopCloser(bytes.NewBuffer(jsonData))
resp, err := adaptor.DoRequest(c, info, requestBody)
if err != nil {
return testResult{
@@ -405,6 +474,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
}
func buildTestRequest(model string, endpointType string, channel *model.Channel) dto.Request {
testResponsesInput := json.RawMessage(`[{"role":"user","content":"hi"}]`)
// 根据端点类型构建不同的测试请求
if endpointType != "" {
switch constant.EndpointType(endpointType) {
@@ -434,7 +505,13 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel)
// 返回 OpenAIResponsesRequest
return &dto.OpenAIResponsesRequest{
Model: model,
Input: json.RawMessage("\"hi\""),
Input: json.RawMessage(`[{"role":"user","content":"hi"}]`),
}
case constant.EndpointTypeOpenAIResponseCompact:
// 返回 OpenAIResponsesCompactionRequest
return &dto.OpenAIResponsesCompactionRequest{
Model: model,
Input: testResponsesInput,
}
case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI:
// 返回 GeneralOpenAIRequest
@@ -457,6 +534,15 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel)
}
// 自动检测逻辑(保持原有行为)
if strings.Contains(strings.ToLower(model), "rerank") {
return &dto.RerankRequest{
Model: model,
Query: "What is Deep Learning?",
Documents: []any{"Deep Learning is a subset of machine learning.", "Machine learning is a field of artificial intelligence."},
TopN: 2,
}
}
// 先判断是否为 Embedding 模型
if strings.Contains(strings.ToLower(model), "embedding") ||
strings.HasPrefix(model, "m3e") ||
@@ -468,11 +554,19 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel)
}
}
// Responses compaction models (must use /v1/responses/compact)
if strings.HasSuffix(model, ratio_setting.CompactModelSuffix) {
return &dto.OpenAIResponsesCompactionRequest{
Model: model,
Input: testResponsesInput,
}
}
// Responses-only models (e.g. codex series)
if strings.Contains(strings.ToLower(model), "codex") {
return &dto.OpenAIResponsesRequest{
Model: model,
Input: json.RawMessage("\"hi\""),
Input: json.RawMessage(`[{"role":"user","content":"hi"}]`),
}
}

View File

@@ -1,16 +1,19 @@
package controller
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/relay/channel/gemini"
"github.com/QuantumNous/new-api/relay/channel/ollama"
"github.com/QuantumNous/new-api/service"
@@ -260,11 +263,37 @@ func FetchUpstreamModels(c *gin.Context) {
return
}
// 对于 Gemini 渠道,使用特殊处理
if channel.Type == constant.ChannelTypeGemini {
// 获取用于请求的可用密钥(多密钥渠道优先使用启用状态的密钥)
key, _, apiErr := channel.GetNextEnabledKey()
if apiErr != nil {
c.JSON(http.StatusOK, gin.H{
"success": false,
"message": fmt.Sprintf("获取渠道密钥失败: %s", apiErr.Error()),
})
return
}
key = strings.TrimSpace(key)
models, err := gemini.FetchGeminiModels(baseURL, key, channel.GetSetting().Proxy)
if err != nil {
c.JSON(http.StatusOK, gin.H{
"success": false,
"message": fmt.Sprintf("获取Gemini模型失败: %s", err.Error()),
})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": models,
})
return
}
var url string
switch channel.Type {
case constant.ChannelTypeGemini:
// curl https://example.com/v1beta/models?key=$GEMINI_API_KEY
url = fmt.Sprintf("%s/v1beta/openai/models", baseURL) // Remove key in url since we need to use AuthHeader
case constant.ChannelTypeAli:
url = fmt.Sprintf("%s/compatible-mode/v1/models", baseURL)
case constant.ChannelTypeZhipu_v4:
@@ -577,9 +606,60 @@ func validateChannel(channel *model.Channel, isAdd bool) error {
}
}
// Codex OAuth key validation (optional, only when JSON object is provided)
if channel.Type == constant.ChannelTypeCodex {
trimmedKey := strings.TrimSpace(channel.Key)
if isAdd || trimmedKey != "" {
if !strings.HasPrefix(trimmedKey, "{") {
return fmt.Errorf("Codex key must be a valid JSON object")
}
var keyMap map[string]any
if err := common.Unmarshal([]byte(trimmedKey), &keyMap); err != nil {
return fmt.Errorf("Codex key must be a valid JSON object")
}
if v, ok := keyMap["access_token"]; !ok || v == nil || strings.TrimSpace(fmt.Sprintf("%v", v)) == "" {
return fmt.Errorf("Codex key JSON must include access_token")
}
if v, ok := keyMap["account_id"]; !ok || v == nil || strings.TrimSpace(fmt.Sprintf("%v", v)) == "" {
return fmt.Errorf("Codex key JSON must include account_id")
}
}
}
return nil
}
func RefreshCodexChannelCredential(c *gin.Context) {
channelId, err := strconv.Atoi(c.Param("id"))
if err != nil {
common.ApiError(c, fmt.Errorf("invalid channel id: %w", err))
return
}
ctx, cancel := context.WithTimeout(c.Request.Context(), 10*time.Second)
defer cancel()
oauthKey, ch, err := service.RefreshCodexChannelCredential(ctx, channelId, service.CodexCredentialRefreshOptions{ResetCaches: true})
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "refreshed",
"data": gin.H{
"expires_at": oauthKey.Expired,
"last_refresh": oauthKey.LastRefresh,
"account_id": oauthKey.AccountID,
"email": oauthKey.Email,
"channel_id": ch.Id,
"channel_type": ch.Type,
"channel_name": ch.Name,
},
})
}
type AddChannelRequest struct {
Mode string `json:"mode"`
MultiKeyMode constant.MultiKeyMode `json:"multi_key_mode"`
@@ -1072,6 +1152,23 @@ func FetchModels(c *gin.Context) {
return
}
if req.Type == constant.ChannelTypeGemini {
models, err := gemini.FetchGeminiModels(baseURL, key, "")
if err != nil {
c.JSON(http.StatusOK, gin.H{
"success": false,
"message": fmt.Sprintf("获取Gemini模型失败: %s", err.Error()),
})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"data": models,
})
return
}
client := &http.Client{}
url := fmt.Sprintf("%s/v1/models", baseURL)

View File

@@ -0,0 +1,88 @@
package controller
import (
"net/http"
"strings"
"github.com/QuantumNous/new-api/service"
"github.com/gin-gonic/gin"
)
func GetChannelAffinityCacheStats(c *gin.Context) {
stats := service.GetChannelAffinityCacheStats()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": stats,
})
}
func ClearChannelAffinityCache(c *gin.Context) {
all := strings.TrimSpace(c.Query("all"))
ruleName := strings.TrimSpace(c.Query("rule_name"))
if all == "true" {
deleted := service.ClearChannelAffinityCacheAll()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": gin.H{
"deleted": deleted,
},
})
return
}
if ruleName == "" {
c.JSON(http.StatusBadRequest, gin.H{
"success": false,
"message": "缺少参数rule_name或使用 all=true 清空全部",
})
return
}
deleted, err := service.ClearChannelAffinityCacheByRuleName(ruleName)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{
"success": false,
"message": err.Error(),
})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": gin.H{
"deleted": deleted,
},
})
}
func GetChannelAffinityUsageCacheStats(c *gin.Context) {
ruleName := strings.TrimSpace(c.Query("rule_name"))
usingGroup := strings.TrimSpace(c.Query("using_group"))
keyFp := strings.TrimSpace(c.Query("key_fp"))
if ruleName == "" {
c.JSON(http.StatusBadRequest, gin.H{
"success": false,
"message": "missing param: rule_name",
})
return
}
if keyFp == "" {
c.JSON(http.StatusBadRequest, gin.H{
"success": false,
"message": "missing param: key_fp",
})
return
}
stats := service.GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp)
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": stats,
})
}

243
controller/codex_oauth.go Normal file
View File

@@ -0,0 +1,243 @@
package controller
import (
"context"
"errors"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/relay/channel/codex"
"github.com/QuantumNous/new-api/service"
"github.com/gin-contrib/sessions"
"github.com/gin-gonic/gin"
)
type codexOAuthCompleteRequest struct {
Input string `json:"input"`
}
func codexOAuthSessionKey(channelID int, field string) string {
return fmt.Sprintf("codex_oauth_%s_%d", field, channelID)
}
func parseCodexAuthorizationInput(input string) (code string, state string, err error) {
v := strings.TrimSpace(input)
if v == "" {
return "", "", errors.New("empty input")
}
if strings.Contains(v, "#") {
parts := strings.SplitN(v, "#", 2)
code = strings.TrimSpace(parts[0])
state = strings.TrimSpace(parts[1])
return code, state, nil
}
if strings.Contains(v, "code=") {
u, parseErr := url.Parse(v)
if parseErr == nil {
q := u.Query()
code = strings.TrimSpace(q.Get("code"))
state = strings.TrimSpace(q.Get("state"))
return code, state, nil
}
q, parseErr := url.ParseQuery(v)
if parseErr == nil {
code = strings.TrimSpace(q.Get("code"))
state = strings.TrimSpace(q.Get("state"))
return code, state, nil
}
}
code = v
return code, "", nil
}
func StartCodexOAuth(c *gin.Context) {
startCodexOAuthWithChannelID(c, 0)
}
func StartCodexOAuthForChannel(c *gin.Context) {
channelID, err := strconv.Atoi(c.Param("id"))
if err != nil {
common.ApiError(c, fmt.Errorf("invalid channel id: %w", err))
return
}
startCodexOAuthWithChannelID(c, channelID)
}
func startCodexOAuthWithChannelID(c *gin.Context, channelID int) {
if channelID > 0 {
ch, err := model.GetChannelById(channelID, false)
if err != nil {
common.ApiError(c, err)
return
}
if ch == nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel not found"})
return
}
if ch.Type != constant.ChannelTypeCodex {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel type is not Codex"})
return
}
}
flow, err := service.CreateCodexOAuthAuthorizationFlow()
if err != nil {
common.ApiError(c, err)
return
}
session := sessions.Default(c)
session.Set(codexOAuthSessionKey(channelID, "state"), flow.State)
session.Set(codexOAuthSessionKey(channelID, "verifier"), flow.Verifier)
session.Set(codexOAuthSessionKey(channelID, "created_at"), time.Now().Unix())
_ = session.Save()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "",
"data": gin.H{
"authorize_url": flow.AuthorizeURL,
},
})
}
func CompleteCodexOAuth(c *gin.Context) {
completeCodexOAuthWithChannelID(c, 0)
}
func CompleteCodexOAuthForChannel(c *gin.Context) {
channelID, err := strconv.Atoi(c.Param("id"))
if err != nil {
common.ApiError(c, fmt.Errorf("invalid channel id: %w", err))
return
}
completeCodexOAuthWithChannelID(c, channelID)
}
func completeCodexOAuthWithChannelID(c *gin.Context, channelID int) {
req := codexOAuthCompleteRequest{}
if err := c.ShouldBindJSON(&req); err != nil {
common.ApiError(c, err)
return
}
code, state, err := parseCodexAuthorizationInput(req.Input)
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
if strings.TrimSpace(code) == "" {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "missing authorization code"})
return
}
if strings.TrimSpace(state) == "" {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "missing state in input"})
return
}
if channelID > 0 {
ch, err := model.GetChannelById(channelID, false)
if err != nil {
common.ApiError(c, err)
return
}
if ch == nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel not found"})
return
}
if ch.Type != constant.ChannelTypeCodex {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel type is not Codex"})
return
}
}
session := sessions.Default(c)
expectedState, _ := session.Get(codexOAuthSessionKey(channelID, "state")).(string)
verifier, _ := session.Get(codexOAuthSessionKey(channelID, "verifier")).(string)
if strings.TrimSpace(expectedState) == "" || strings.TrimSpace(verifier) == "" {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "oauth flow not started or session expired"})
return
}
if state != expectedState {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "state mismatch"})
return
}
ctx, cancel := context.WithTimeout(c.Request.Context(), 15*time.Second)
defer cancel()
tokenRes, err := service.ExchangeCodexAuthorizationCode(ctx, code, verifier)
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
accountID, ok := service.ExtractCodexAccountIDFromJWT(tokenRes.AccessToken)
if !ok {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "failed to extract account_id from access_token"})
return
}
email, _ := service.ExtractEmailFromJWT(tokenRes.AccessToken)
key := codex.OAuthKey{
AccessToken: tokenRes.AccessToken,
RefreshToken: tokenRes.RefreshToken,
AccountID: accountID,
LastRefresh: time.Now().Format(time.RFC3339),
Expired: tokenRes.ExpiresAt.Format(time.RFC3339),
Email: email,
Type: "codex",
}
encoded, err := common.Marshal(key)
if err != nil {
common.ApiError(c, err)
return
}
session.Delete(codexOAuthSessionKey(channelID, "state"))
session.Delete(codexOAuthSessionKey(channelID, "verifier"))
session.Delete(codexOAuthSessionKey(channelID, "created_at"))
_ = session.Save()
if channelID > 0 {
if err := model.DB.Model(&model.Channel{}).Where("id = ?", channelID).Update("key", string(encoded)).Error; err != nil {
common.ApiError(c, err)
return
}
model.InitChannelCache()
service.ResetProxyClientCache()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "saved",
"data": gin.H{
"channel_id": channelID,
"account_id": accountID,
"email": email,
"expires_at": key.Expired,
"last_refresh": key.LastRefresh,
},
})
return
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "generated",
"data": gin.H{
"key": string(encoded),
"account_id": accountID,
"email": email,
"expires_at": key.Expired,
"last_refresh": key.LastRefresh,
},
})
}

124
controller/codex_usage.go Normal file
View File

@@ -0,0 +1,124 @@
package controller
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/relay/channel/codex"
"github.com/QuantumNous/new-api/service"
"github.com/gin-gonic/gin"
)
func GetCodexChannelUsage(c *gin.Context) {
channelId, err := strconv.Atoi(c.Param("id"))
if err != nil {
common.ApiError(c, fmt.Errorf("invalid channel id: %w", err))
return
}
ch, err := model.GetChannelById(channelId, true)
if err != nil {
common.ApiError(c, err)
return
}
if ch == nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel not found"})
return
}
if ch.Type != constant.ChannelTypeCodex {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "channel type is not Codex"})
return
}
if ch.ChannelInfo.IsMultiKey {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "multi-key channel is not supported"})
return
}
oauthKey, err := codex.ParseOAuthKey(strings.TrimSpace(ch.Key))
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
accessToken := strings.TrimSpace(oauthKey.AccessToken)
accountID := strings.TrimSpace(oauthKey.AccountID)
if accessToken == "" {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "codex channel: access_token is required"})
return
}
if accountID == "" {
c.JSON(http.StatusOK, gin.H{"success": false, "message": "codex channel: account_id is required"})
return
}
client, err := service.NewProxyHttpClient(ch.GetSetting().Proxy)
if err != nil {
common.ApiError(c, err)
return
}
ctx, cancel := context.WithTimeout(c.Request.Context(), 15*time.Second)
defer cancel()
statusCode, body, err := service.FetchCodexWhamUsage(ctx, client, ch.GetBaseURL(), accessToken, accountID)
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
if (statusCode == http.StatusUnauthorized || statusCode == http.StatusForbidden) && strings.TrimSpace(oauthKey.RefreshToken) != "" {
refreshCtx, refreshCancel := context.WithTimeout(c.Request.Context(), 10*time.Second)
defer refreshCancel()
res, refreshErr := service.RefreshCodexOAuthToken(refreshCtx, oauthKey.RefreshToken)
if refreshErr == nil {
oauthKey.AccessToken = res.AccessToken
oauthKey.RefreshToken = res.RefreshToken
oauthKey.LastRefresh = time.Now().Format(time.RFC3339)
oauthKey.Expired = res.ExpiresAt.Format(time.RFC3339)
if strings.TrimSpace(oauthKey.Type) == "" {
oauthKey.Type = "codex"
}
encoded, encErr := common.Marshal(oauthKey)
if encErr == nil {
_ = model.DB.Model(&model.Channel{}).Where("id = ?", ch.Id).Update("key", string(encoded)).Error
model.InitChannelCache()
service.ResetProxyClientCache()
}
ctx2, cancel2 := context.WithTimeout(c.Request.Context(), 15*time.Second)
defer cancel2()
statusCode, body, err = service.FetchCodexWhamUsage(ctx2, client, ch.GetBaseURL(), oauthKey.AccessToken, accountID)
if err != nil {
c.JSON(http.StatusOK, gin.H{"success": false, "message": err.Error()})
return
}
}
}
var payload any
if json.Unmarshal(body, &payload) != nil {
payload = string(body)
}
ok := statusCode >= 200 && statusCode < 300
resp := gin.H{
"success": ok,
"message": "",
"upstream_status": statusCode,
"data": payload,
}
if !ok {
resp["message"] = fmt.Sprintf("upstream status: %d", statusCode)
}
c.JSON(http.StatusOK, resp)
}

View File

@@ -115,6 +115,7 @@ func GetStatus(c *gin.Context) {
"user_agreement_enabled": legalSetting.UserAgreement != "",
"privacy_policy_enabled": legalSetting.PrivacyPolicy != "",
"checkin_enabled": operation_setting.GetCheckinSetting().Enabled,
"_qn": "new-api",
}
// 根据启用状态注入可选内容

View File

@@ -99,6 +99,9 @@ func newHTTPClient() *http.Client {
ExpectContinueTimeout: 1 * time.Second,
ResponseHeaderTimeout: time.Duration(timeoutSec) * time.Second,
}
if common.TLSInsecureSkipVerify {
transport.TLSClientConfig = common.InsecureTLSConfig
}
transport.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
host, _, err := net.SplitHostPort(addr)
if err != nil {
@@ -115,7 +118,17 @@ func newHTTPClient() *http.Client {
return &http.Client{Transport: transport}
}
var httpClient = newHTTPClient()
var (
httpClientOnce sync.Once
httpClient *http.Client
)
func getHTTPClient() *http.Client {
httpClientOnce.Do(func() {
httpClient = newHTTPClient()
})
return httpClient
}
func fetchJSON[T any](ctx context.Context, url string, out *upstreamEnvelope[T]) error {
var lastErr error
@@ -138,7 +151,7 @@ func fetchJSON[T any](ctx context.Context, url string, out *upstreamEnvelope[T])
}
cacheMutex.RUnlock()
resp, err := httpClient.Do(req)
resp, err := getHTTPClient().Do(req)
if err != nil {
lastErr = err
// backoff with jitter

View File

@@ -10,6 +10,7 @@ import (
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/setting"
"github.com/QuantumNous/new-api/setting/console_setting"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/setting/system_setting"
@@ -177,6 +178,24 @@ func UpdateOption(c *gin.Context) {
})
return
}
case "AutomaticDisableStatusCodes":
_, err = operation_setting.ParseHTTPStatusCodeRanges(option.Value.(string))
if err != nil {
c.JSON(http.StatusOK, gin.H{
"success": false,
"message": err.Error(),
})
return
}
case "AutomaticRetryStatusCodes":
_, err = operation_setting.ParseHTTPStatusCodeRanges(option.Value.(string))
if err != nil {
c.JSON(http.StatusOK, gin.H{
"success": false,
"message": err.Error(),
})
return
}
case "console_setting.api_info":
err = console_setting.ValidateConsoleSettings(option.Value.(string), "ApiInfo")
if err != nil {

201
controller/performance.go Normal file
View File

@@ -0,0 +1,201 @@
package controller
import (
"net/http"
"os"
"path/filepath"
"runtime"
"github.com/QuantumNous/new-api/common"
"github.com/gin-gonic/gin"
)
// PerformanceStats 性能统计信息
type PerformanceStats struct {
// 缓存统计
CacheStats common.DiskCacheStats `json:"cache_stats"`
// 系统内存统计
MemoryStats MemoryStats `json:"memory_stats"`
// 磁盘缓存目录信息
DiskCacheInfo DiskCacheInfo `json:"disk_cache_info"`
// 磁盘空间信息
DiskSpaceInfo DiskSpaceInfo `json:"disk_space_info"`
// 配置信息
Config PerformanceConfig `json:"config"`
}
// MemoryStats 内存统计
type MemoryStats struct {
// 已分配内存(字节)
Alloc uint64 `json:"alloc"`
// 总分配内存(字节)
TotalAlloc uint64 `json:"total_alloc"`
// 系统内存(字节)
Sys uint64 `json:"sys"`
// GC 次数
NumGC uint32 `json:"num_gc"`
// Goroutine 数量
NumGoroutine int `json:"num_goroutine"`
}
// DiskCacheInfo 磁盘缓存目录信息
type DiskCacheInfo struct {
// 缓存目录路径
Path string `json:"path"`
// 目录是否存在
Exists bool `json:"exists"`
// 文件数量
FileCount int `json:"file_count"`
// 总大小(字节)
TotalSize int64 `json:"total_size"`
}
// DiskSpaceInfo 磁盘空间信息
type DiskSpaceInfo struct {
// 总空间(字节)
Total uint64 `json:"total"`
// 可用空间(字节)
Free uint64 `json:"free"`
// 已用空间(字节)
Used uint64 `json:"used"`
// 使用百分比
UsedPercent float64 `json:"used_percent"`
}
// PerformanceConfig 性能配置
type PerformanceConfig struct {
// 是否启用磁盘缓存
DiskCacheEnabled bool `json:"disk_cache_enabled"`
// 磁盘缓存阈值MB
DiskCacheThresholdMB int `json:"disk_cache_threshold_mb"`
// 磁盘缓存最大大小MB
DiskCacheMaxSizeMB int `json:"disk_cache_max_size_mb"`
// 磁盘缓存路径
DiskCachePath string `json:"disk_cache_path"`
// 是否在容器中运行
IsRunningInContainer bool `json:"is_running_in_container"`
}
// GetPerformanceStats 获取性能统计信息
func GetPerformanceStats(c *gin.Context) {
// 获取缓存统计
cacheStats := common.GetDiskCacheStats()
// 获取内存统计
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// 获取磁盘缓存目录信息
diskCacheInfo := getDiskCacheInfo()
// 获取配置信息
diskConfig := common.GetDiskCacheConfig()
config := PerformanceConfig{
DiskCacheEnabled: diskConfig.Enabled,
DiskCacheThresholdMB: diskConfig.ThresholdMB,
DiskCacheMaxSizeMB: diskConfig.MaxSizeMB,
DiskCachePath: diskConfig.Path,
IsRunningInContainer: common.IsRunningInContainer(),
}
// 获取磁盘空间信息
diskSpaceInfo := getDiskSpaceInfo()
stats := PerformanceStats{
CacheStats: cacheStats,
MemoryStats: MemoryStats{
Alloc: memStats.Alloc,
TotalAlloc: memStats.TotalAlloc,
Sys: memStats.Sys,
NumGC: memStats.NumGC,
NumGoroutine: runtime.NumGoroutine(),
},
DiskCacheInfo: diskCacheInfo,
DiskSpaceInfo: diskSpaceInfo,
Config: config,
}
c.JSON(http.StatusOK, gin.H{
"success": true,
"data": stats,
})
}
// ClearDiskCache 清理磁盘缓存
func ClearDiskCache(c *gin.Context) {
cachePath := common.GetDiskCachePath()
if cachePath == "" {
cachePath = os.TempDir()
}
dir := filepath.Join(cachePath, "new-api-body-cache")
// 删除缓存目录
err := os.RemoveAll(dir)
if err != nil && !os.IsNotExist(err) {
common.ApiError(c, err)
return
}
// 重置统计
common.ResetDiskCacheStats()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "磁盘缓存已清理",
})
}
// ResetPerformanceStats 重置性能统计
func ResetPerformanceStats(c *gin.Context) {
common.ResetDiskCacheStats()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "统计信息已重置",
})
}
// ForceGC 强制执行 GC
func ForceGC(c *gin.Context) {
runtime.GC()
c.JSON(http.StatusOK, gin.H{
"success": true,
"message": "GC 已执行",
})
}
// getDiskCacheInfo 获取磁盘缓存目录信息
func getDiskCacheInfo() DiskCacheInfo {
cachePath := common.GetDiskCachePath()
if cachePath == "" {
cachePath = os.TempDir()
}
dir := filepath.Join(cachePath, "new-api-body-cache")
info := DiskCacheInfo{
Path: dir,
Exists: false,
}
entries, err := os.ReadDir(dir)
if err != nil {
return info
}
info.Exists = true
info.FileCount = 0
info.TotalSize = 0
for _, entry := range entries {
if entry.IsDir() {
continue
}
info.FileCount++
if fileInfo, err := entry.Info(); err == nil {
info.TotalSize += fileInfo.Size()
}
}
return info
}

View File

@@ -0,0 +1,38 @@
//go:build !windows
package controller
import (
"os"
"github.com/QuantumNous/new-api/common"
"golang.org/x/sys/unix"
)
// getDiskSpaceInfo 获取缓存目录所在磁盘的空间信息 (Unix/Linux/macOS)
func getDiskSpaceInfo() DiskSpaceInfo {
cachePath := common.GetDiskCachePath()
if cachePath == "" {
cachePath = os.TempDir()
}
info := DiskSpaceInfo{}
var stat unix.Statfs_t
err := unix.Statfs(cachePath, &stat)
if err != nil {
return info
}
// 计算磁盘空间 (显式转换以兼容 FreeBSD其字段类型为 int64)
bsize := uint64(stat.Bsize)
info.Total = uint64(stat.Blocks) * bsize
info.Free = uint64(stat.Bavail) * bsize
info.Used = info.Total - uint64(stat.Bfree)*bsize
if info.Total > 0 {
info.UsedPercent = float64(info.Used) / float64(info.Total) * 100
}
return info
}

View File

@@ -0,0 +1,52 @@
//go:build windows
package controller
import (
"os"
"syscall"
"unsafe"
"github.com/QuantumNous/new-api/common"
)
// getDiskSpaceInfo 获取缓存目录所在磁盘的空间信息 (Windows)
func getDiskSpaceInfo() DiskSpaceInfo {
cachePath := common.GetDiskCachePath()
if cachePath == "" {
cachePath = os.TempDir()
}
info := DiskSpaceInfo{}
kernel32 := syscall.NewLazyDLL("kernel32.dll")
getDiskFreeSpaceEx := kernel32.NewProc("GetDiskFreeSpaceExW")
var freeBytesAvailable, totalBytes, totalFreeBytes uint64
pathPtr, err := syscall.UTF16PtrFromString(cachePath)
if err != nil {
return info
}
ret, _, _ := getDiskFreeSpaceEx.Call(
uintptr(unsafe.Pointer(pathPtr)),
uintptr(unsafe.Pointer(&freeBytesAvailable)),
uintptr(unsafe.Pointer(&totalBytes)),
uintptr(unsafe.Pointer(&totalFreeBytes)),
)
if ret == 0 {
return info
}
info.Total = totalBytes
info.Free = freeBytesAvailable
info.Used = totalBytes - totalFreeBytes
if info.Total > 0 {
info.UsedPercent = float64(info.Used) / float64(info.Total) * 100
}
return info
}

View File

@@ -11,6 +11,7 @@ import (
"sync"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/dto"
@@ -110,6 +111,9 @@ func FetchUpstreamRatios(c *gin.Context) {
dialer := &net.Dialer{Timeout: 10 * time.Second}
transport := &http.Transport{MaxIdleConns: 100, IdleConnTimeout: 90 * time.Second, TLSHandshakeTimeout: 10 * time.Second, ExpectContinueTimeout: 1 * time.Second, ResponseHeaderTimeout: 10 * time.Second}
if common.TLSInsecureSkipVerify {
transport.TLSClientConfig = common.InsecureTLSConfig
}
transport.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
host, _, err := net.SplitHostPort(addr)
if err != nil {

View File

@@ -21,6 +21,7 @@ import (
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/types"
"github.com/bytedance/gopkg/util/gopool"
@@ -44,7 +45,7 @@ func relayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIErro
err = relay.RerankHelper(c, info)
case relayconstant.RelayModeEmbeddings:
err = relay.EmbeddingHelper(c, info)
case relayconstant.RelayModeResponses:
case relayconstant.RelayModeResponses, relayconstant.RelayModeResponsesCompact:
err = relay.ResponsesHelper(c, info)
default:
err = relay.TextHelper(c, info)
@@ -166,8 +167,12 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
defer func() {
// Only return quota if downstream failed and quota was actually pre-consumed
if newAPIError != nil && relayInfo.FinalPreConsumedQuota != 0 {
service.ReturnPreConsumedQuota(c, relayInfo)
if newAPIError != nil {
newAPIError = service.NormalizeViolationFeeError(newAPIError)
if relayInfo.FinalPreConsumedQuota != 0 {
service.ReturnPreConsumedQuota(c, relayInfo)
}
service.ChargeViolationFeeIfNeeded(c, relayInfo, newAPIError)
}
}()
@@ -214,6 +219,8 @@ func Relay(c *gin.Context, relayFormat types.RelayFormat) {
return
}
newAPIError = service.NormalizeViolationFeeError(newAPIError)
processChannelError(c, *types.NewChannelError(channel.Id, channel.Type, channel.Name, channel.ChannelInfo.IsMultiKey, common.GetContextKeyString(c, constant.ContextKeyChannelKey), channel.GetAutoBan()), newAPIError)
if !shouldRetry(c, newAPIError, common.RetryTimes-retryParam.GetRetry()) {
@@ -304,6 +311,9 @@ func shouldRetry(c *gin.Context, openaiErr *types.NewAPIError, retryTimes int) b
if openaiErr == nil {
return false
}
if service.ShouldSkipRetryAfterChannelAffinityFailure(c) {
return false
}
if types.IsChannelError(openaiErr) {
return true
}
@@ -316,30 +326,14 @@ func shouldRetry(c *gin.Context, openaiErr *types.NewAPIError, retryTimes int) b
if _, ok := c.Get("specific_channel_id"); ok {
return false
}
if openaiErr.StatusCode == http.StatusTooManyRequests {
return true
}
if openaiErr.StatusCode == 307 {
return true
}
if openaiErr.StatusCode/100 == 5 {
// 超时不重试
if openaiErr.StatusCode == 504 || openaiErr.StatusCode == 524 {
return false
}
return true
}
if openaiErr.StatusCode == http.StatusBadRequest {
code := openaiErr.StatusCode
if code >= 200 && code < 300 {
return false
}
if openaiErr.StatusCode == 408 {
// azure处理超时不重试
return false
if code < 100 || code > 599 {
return true
}
if openaiErr.StatusCode/100 == 2 {
return false
}
return true
return operation_setting.ShouldRetryByStatusCode(code)
}
func processChannelError(c *gin.Context, channelError types.ChannelError, err *types.NewAPIError) {
@@ -348,7 +342,7 @@ func processChannelError(c *gin.Context, channelError types.ChannelError, err *t
// do not use context to get channel info, there may be inconsistent channel info when processing asynchronously
if service.ShouldDisableChannel(channelError.ChannelType, err) && channelError.AutoBan {
gopool.Go(func() {
service.DisableChannel(channelError, err.Error())
service.DisableChannel(channelError, err.ErrorWithStatusCode())
})
}
@@ -377,8 +371,9 @@ func processChannelError(c *gin.Context, channelError types.ChannelError, err *t
adminInfo["is_multi_key"] = true
adminInfo["multi_key_index"] = common.GetContextKeyInt(c, constant.ContextKeyChannelMultiKeyIndex)
}
service.AppendChannelAffinityAdminInfo(c, adminInfo)
other["admin_info"] = adminInfo
model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.MaskSensitiveError(), tokenId, 0, false, userGroup, other)
model.RecordErrorLog(c, userId, channelId, modelName, tokenName, err.MaskSensitiveErrorWithStatusCode(), tokenId, 0, false, userGroup, other)
}
}
@@ -522,6 +517,9 @@ func shouldRetryTaskRelay(c *gin.Context, channelId int, taskErr *dto.TaskError,
if taskErr == nil {
return false
}
if service.ShouldSkipRetryAfterChannelAffinityFailure(c) {
return false
}
if retryTimes <= 0 {
return false
}

View File

@@ -28,9 +28,18 @@ const (
var stripeAdaptor = &StripeAdaptor{}
// StripePayRequest represents a payment request for Stripe checkout.
type StripePayRequest struct {
Amount int64 `json:"amount"`
// Amount is the quantity of units to purchase.
Amount int64 `json:"amount"`
// PaymentMethod specifies the payment method (e.g., "stripe").
PaymentMethod string `json:"payment_method"`
// SuccessURL is the optional custom URL to redirect after successful payment.
// If empty, defaults to the server's console log page.
SuccessURL string `json:"success_url,omitempty"`
// CancelURL is the optional custom URL to redirect when payment is canceled.
// If empty, defaults to the server's console topup page.
CancelURL string `json:"cancel_url,omitempty"`
}
type StripeAdaptor struct {
@@ -69,6 +78,16 @@ func (*StripeAdaptor) RequestPay(c *gin.Context, req *StripePayRequest) {
return
}
if req.SuccessURL != "" && common.ValidateRedirectURL(req.SuccessURL) != nil {
c.JSON(http.StatusBadRequest, gin.H{"message": "支付成功重定向URL不在可信任域名列表中", "data": ""})
return
}
if req.CancelURL != "" && common.ValidateRedirectURL(req.CancelURL) != nil {
c.JSON(http.StatusBadRequest, gin.H{"message": "支付取消重定向URL不在可信任域名列表中", "data": ""})
return
}
id := c.GetInt("id")
user, _ := model.GetUserById(id, false)
chargedMoney := GetChargedAmount(float64(req.Amount), *user)
@@ -76,7 +95,7 @@ func (*StripeAdaptor) RequestPay(c *gin.Context, req *StripePayRequest) {
reference := fmt.Sprintf("new-api-ref-%d-%d-%s", user.Id, time.Now().UnixMilli(), randstr.String(4))
referenceId := "ref_" + common.Sha1([]byte(reference))
payLink, err := genStripeLink(referenceId, user.StripeCustomer, user.Email, req.Amount)
payLink, err := genStripeLink(referenceId, user.StripeCustomer, user.Email, req.Amount, req.SuccessURL, req.CancelURL)
if err != nil {
log.Println("获取Stripe Checkout支付链接失败", err)
c.JSON(200, gin.H{"message": "error", "data": "拉起支付失败"})
@@ -210,17 +229,37 @@ func sessionExpired(event stripe.Event) {
log.Println("充值订单已过期", referenceId)
}
func genStripeLink(referenceId string, customerId string, email string, amount int64) (string, error) {
// genStripeLink generates a Stripe Checkout session URL for payment.
// It creates a new checkout session with the specified parameters and returns the payment URL.
//
// Parameters:
// - referenceId: unique reference identifier for the transaction
// - customerId: existing Stripe customer ID (empty string if new customer)
// - email: customer email address for new customer creation
// - amount: quantity of units to purchase
// - successURL: custom URL to redirect after successful payment (empty for default)
// - cancelURL: custom URL to redirect when payment is canceled (empty for default)
//
// Returns the checkout session URL or an error if the session creation fails.
func genStripeLink(referenceId string, customerId string, email string, amount int64, successURL string, cancelURL string) (string, error) {
if !strings.HasPrefix(setting.StripeApiSecret, "sk_") && !strings.HasPrefix(setting.StripeApiSecret, "rk_") {
return "", fmt.Errorf("无效的Stripe API密钥")
}
stripe.Key = setting.StripeApiSecret
// Use custom URLs if provided, otherwise use defaults
if successURL == "" {
successURL = system_setting.ServerAddress + "/console/log"
}
if cancelURL == "" {
cancelURL = system_setting.ServerAddress + "/console/topup"
}
params := &stripe.CheckoutSessionParams{
ClientReferenceID: stripe.String(referenceId),
SuccessURL: stripe.String(system_setting.ServerAddress + "/console/log"),
CancelURL: stripe.String(system_setting.ServerAddress + "/console/topup"),
SuccessURL: stripe.String(successURL),
CancelURL: stripe.String(cancelURL),
LineItems: []*stripe.CheckoutSessionLineItemParams{
{
Price: stripe.String(setting.StripePriceId),

View File

@@ -284,6 +284,46 @@
}
]
}
},
"/v1/responses/compact": {
"post": {
"summary": "压缩对话 (OpenAI Responses API)",
"deprecated": false,
"description": "OpenAI Responses API用于对长对话进行 compaction。",
"operationId": "compactResponse",
"tags": [
"OpenAI格式(Responses)"
],
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ResponsesCompactionRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "成功压缩对话",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ResponsesCompactionResponse"
}
}
},
"headers": {}
}
},
"security": [
{
"BearerAuth": []
}
]
}
},
"/v1/images/generations": {
"post": {
@@ -3130,10 +3170,71 @@
}
}
},
"ResponsesStreamResponse": {
"type": "object",
"properties": {
"type": {
"ResponsesCompactionResponse": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"object": {
"type": "string",
"example": "response.compaction"
},
"created_at": {
"type": "integer"
},
"output": {
"type": "array",
"items": {
"type": "object",
"properties": {}
}
},
"usage": {
"$ref": "#/components/schemas/Usage"
},
"error": {
"type": "object",
"properties": {}
}
}
},
"ResponsesCompactionRequest": {
"type": "object",
"required": [
"model"
],
"properties": {
"model": {
"type": "string"
},
"input": {
"description": "输入内容,可以是字符串或消息数组",
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "object",
"properties": {}
}
}
]
},
"instructions": {
"type": "string"
},
"previous_response_id": {
"type": "string"
}
}
},
"ResponsesStreamResponse": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"response": {
@@ -7138,4 +7239,4 @@
"BearerAuth": []
}
]
}
}

View File

@@ -26,7 +26,8 @@ type GeneralErrorResponse struct {
Msg string `json:"msg"`
Err string `json:"err"`
ErrorMsg string `json:"error_msg"`
Metadata json.RawMessage `json:"metadata,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
Detail string `json:"detail,omitempty"`
Header struct {
Message string `json:"message"`
} `json:"header"`
@@ -79,6 +80,9 @@ func (e GeneralErrorResponse) ToMessage() string {
if e.ErrorMsg != "" {
return e.ErrorMsg
}
if e.Detail != "" {
return e.Detail
}
if e.Header.Message != "" {
return e.Header.Message
}

View File

@@ -341,6 +341,88 @@ type GeminiChatGenerationConfig struct {
ImageConfig json.RawMessage `json:"imageConfig,omitempty"` // RawMessage to allow flexible image config
}
// UnmarshalJSON allows GeminiChatGenerationConfig to accept both snake_case and camelCase fields.
func (c *GeminiChatGenerationConfig) UnmarshalJSON(data []byte) error {
type Alias GeminiChatGenerationConfig
var aux struct {
Alias
TopPSnake float64 `json:"top_p,omitempty"`
TopKSnake float64 `json:"top_k,omitempty"`
MaxOutputTokensSnake uint `json:"max_output_tokens,omitempty"`
CandidateCountSnake int `json:"candidate_count,omitempty"`
StopSequencesSnake []string `json:"stop_sequences,omitempty"`
ResponseMimeTypeSnake string `json:"response_mime_type,omitempty"`
ResponseSchemaSnake any `json:"response_schema,omitempty"`
ResponseJsonSchemaSnake json.RawMessage `json:"response_json_schema,omitempty"`
PresencePenaltySnake *float32 `json:"presence_penalty,omitempty"`
FrequencyPenaltySnake *float32 `json:"frequency_penalty,omitempty"`
ResponseLogprobsSnake bool `json:"response_logprobs,omitempty"`
MediaResolutionSnake MediaResolution `json:"media_resolution,omitempty"`
ResponseModalitiesSnake []string `json:"response_modalities,omitempty"`
ThinkingConfigSnake *GeminiThinkingConfig `json:"thinking_config,omitempty"`
SpeechConfigSnake json.RawMessage `json:"speech_config,omitempty"`
ImageConfigSnake json.RawMessage `json:"image_config,omitempty"`
}
if err := common.Unmarshal(data, &aux); err != nil {
return err
}
*c = GeminiChatGenerationConfig(aux.Alias)
// Prioritize snake_case if present
if aux.TopPSnake != 0 {
c.TopP = aux.TopPSnake
}
if aux.TopKSnake != 0 {
c.TopK = aux.TopKSnake
}
if aux.MaxOutputTokensSnake != 0 {
c.MaxOutputTokens = aux.MaxOutputTokensSnake
}
if aux.CandidateCountSnake != 0 {
c.CandidateCount = aux.CandidateCountSnake
}
if len(aux.StopSequencesSnake) > 0 {
c.StopSequences = aux.StopSequencesSnake
}
if aux.ResponseMimeTypeSnake != "" {
c.ResponseMimeType = aux.ResponseMimeTypeSnake
}
if aux.ResponseSchemaSnake != nil {
c.ResponseSchema = aux.ResponseSchemaSnake
}
if len(aux.ResponseJsonSchemaSnake) > 0 {
c.ResponseJsonSchema = aux.ResponseJsonSchemaSnake
}
if aux.PresencePenaltySnake != nil {
c.PresencePenalty = aux.PresencePenaltySnake
}
if aux.FrequencyPenaltySnake != nil {
c.FrequencyPenalty = aux.FrequencyPenaltySnake
}
if aux.ResponseLogprobsSnake {
c.ResponseLogprobs = aux.ResponseLogprobsSnake
}
if aux.MediaResolutionSnake != "" {
c.MediaResolution = aux.MediaResolutionSnake
}
if len(aux.ResponseModalitiesSnake) > 0 {
c.ResponseModalities = aux.ResponseModalitiesSnake
}
if aux.ThinkingConfigSnake != nil {
c.ThinkingConfig = aux.ThinkingConfigSnake
}
if len(aux.SpeechConfigSnake) > 0 {
c.SpeechConfig = aux.SpeechConfigSnake
}
if len(aux.ImageConfigSnake) > 0 {
c.ImageConfig = aux.ImageConfigSnake
}
return nil
}
type MediaResolution string
type GeminiChatCandidate struct {
@@ -367,11 +449,12 @@ type GeminiChatResponse struct {
}
type GeminiUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
CachedContentTokenCount int `json:"cachedContentTokenCount"`
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
}
type GeminiPromptTokensDetails struct {

20
dto/openai_compaction.go Normal file
View File

@@ -0,0 +1,20 @@
package dto
import (
"encoding/json"
"github.com/QuantumNous/new-api/types"
)
type OpenAIResponsesCompactionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
CreatedAt int `json:"created_at"`
Output json.RawMessage `json:"output"`
Usage *Usage `json:"usage"`
Error any `json:"error,omitempty"`
}
func (o *OpenAIResponsesCompactionResponse) GetOpenAIError() *types.OpenAIError {
return GetOpenAIError(o.Error)
}

View File

@@ -808,15 +808,19 @@ type OpenAIResponsesRequest struct {
PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
Stream bool `json:"stream,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
Text json.RawMessage `json:"text,omitempty"`
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少MCP 参数太多不确定,所以用 map
TopP float64 `json:"top_p,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
Truncation string `json:"truncation,omitempty"`
User string `json:"user,omitempty"`
MaxToolCalls uint `json:"max_tool_calls,omitempty"`
Prompt json.RawMessage `json:"prompt,omitempty"`
// qwen
EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
// perplexity
Preset json.RawMessage `json:"preset,omitempty"`
}
func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {

View File

@@ -334,13 +334,16 @@ type IncompleteDetails struct {
}
type ResponsesOutput struct {
Type string `json:"type"`
ID string `json:"id"`
Status string `json:"status"`
Role string `json:"role"`
Content []ResponsesOutputContent `json:"content"`
Quality string `json:"quality"`
Size string `json:"size"`
Type string `json:"type"`
ID string `json:"id"`
Status string `json:"status"`
Role string `json:"role"`
Content []ResponsesOutputContent `json:"content"`
Quality string `json:"quality"`
Size string `json:"size"`
CallId string `json:"call_id,omitempty"`
Name string `json:"name,omitempty"`
Arguments string `json:"arguments,omitempty"`
}
type ResponsesOutputContent struct {
@@ -369,6 +372,10 @@ type ResponsesStreamResponse struct {
Response *OpenAIResponsesResponse `json:"response,omitempty"`
Delta string `json:"delta,omitempty"`
Item *ResponsesOutput `json:"item,omitempty"`
// - response.function_call_arguments.delta
// - response.function_call_arguments.done
OutputIndex *int `json:"output_index,omitempty"`
ItemID string `json:"item_id,omitempty"`
}
// GetOpenAIError 从动态错误类型中提取OpenAIError结构

View File

@@ -0,0 +1,40 @@
package dto
import (
"encoding/json"
"strings"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type OpenAIResponsesCompactionRequest struct {
Model string `json:"model"`
Input json.RawMessage `json:"input,omitempty"`
Instructions json.RawMessage `json:"instructions,omitempty"`
PreviousResponseID string `json:"previous_response_id,omitempty"`
}
func (r *OpenAIResponsesCompactionRequest) GetTokenCountMeta() *types.TokenCountMeta {
var parts []string
if len(r.Instructions) > 0 {
parts = append(parts, string(r.Instructions))
}
if len(r.Input) > 0 {
parts = append(parts, string(r.Input))
}
return &types.TokenCountMeta{
CombineText: strings.Join(parts, "\n"),
}
}
func (r *OpenAIResponsesCompactionRequest) IsStream(c *gin.Context) bool {
return false
}
func (r *OpenAIResponsesCompactionRequest) SetModelName(modelName string) {
if modelName != "" {
r.Model = modelName
}
}

55
dto/values.go Normal file
View File

@@ -0,0 +1,55 @@
package dto
import (
"encoding/json"
"strconv"
)
type IntValue int
func (i *IntValue) UnmarshalJSON(b []byte) error {
var n int
if err := json.Unmarshal(b, &n); err == nil {
*i = IntValue(n)
return nil
}
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
v, err := strconv.Atoi(s)
if err != nil {
return err
}
*i = IntValue(v)
return nil
}
func (i IntValue) MarshalJSON() ([]byte, error) {
return json.Marshal(int(i))
}
type BoolValue bool
func (b *BoolValue) UnmarshalJSON(data []byte) error {
var boolean bool
if err := json.Unmarshal(data, &boolean); err == nil {
*b = BoolValue(boolean)
return nil
}
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
if str == "true" {
*b = BoolValue(true)
} else if str == "false" {
*b = BoolValue(false)
} else {
return json.Unmarshal(data, &boolean)
}
return nil
}
func (b BoolValue) MarshalJSON() ([]byte, error) {
return json.Marshal(bool(b))
}

17
go.mod
View File

@@ -55,16 +55,18 @@ require (
)
require (
github.com/DmitriyVTitov/size v1.5.0 // indirect
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/boombuler/barcode v1.1.0 // indirect
github.com/bytedance/sonic v1.14.1 // indirect
github.com/bytedance/sonic/loader v0.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cloudwego/base64x v0.1.6 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/dlclark/regexp2 v1.11.5 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
@@ -94,7 +96,7 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
@@ -103,10 +105,17 @@ require (
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/pelletier/go-toml/v2 v2.2.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.22.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/samber/go-singleflightx v0.3.2 // indirect
github.com/samber/hot v0.11.0 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
@@ -120,7 +129,7 @@ require (
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
google.golang.org/protobuf v1.36.5 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
modernc.org/libc v1.66.10 // indirect
modernc.org/mathutil v1.7.1 // indirect

27
go.sum
View File

@@ -1,5 +1,7 @@
github.com/Calcium-Ion/go-epay v0.0.4 h1:C96M7WfRLadcIVscWzwLiYs8etI1wrDmtFMuK2zP22A=
github.com/Calcium-Ion/go-epay v0.0.4/go.mod h1:cxo/ZOg8ClvE3VAnCmEzbuyAZINSq7kFEN9oHj5WQ2U=
github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g=
github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0=
github.com/abema/go-mp4 v1.4.1 h1:YoS4VRqd+pAmddRPLFf8vMk74kuGl6ULSjzhsIqwr6M=
github.com/abema/go-mp4 v1.4.1/go.mod h1:vPl9t5ZK7K0x68jh12/+ECWBCXoWuIDtNgPtU2f04ws=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
@@ -22,6 +24,8 @@ github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.33.0 h1:JzidOz4Hcn2RbP5fv
github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.33.0/go.mod h1:9A4/PJYlWjvjEzzoOLGQjkLt4bYK9fRWi7uz1GSsAcA=
github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw=
github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/boombuler/barcode v1.1.0 h1:ChaYjBR63fr4LFyGn8E8nt7dBSt3MiU3zMOZqFvVkHo=
github.com/boombuler/barcode v1.1.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
@@ -40,6 +44,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
@@ -110,6 +116,7 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@@ -165,6 +172,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU=
github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -200,6 +209,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
@@ -218,13 +229,27 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pquerna/otp v1.5.0 h1:NMMR+WrmaqXU4EzdGJEE1aUUI0AMRzsp96fFFWNPwxs=
github.com/pquerna/otp v1.5.0/go.mod h1:dkJfzwRKNiegxyNb54X/3fLwhCynbMspSyWKnvi1AEg=
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/samber/go-singleflightx v0.3.2 h1:jXbUU0fvis8Fdv4HGONboX5WdEZcYLoBEcKiE+ITCyQ=
github.com/samber/go-singleflightx v0.3.2/go.mod h1:X2BR+oheHIYc73PvxRMlcASg6KYYTQyUYpdVU7t/ux4=
github.com/samber/hot v0.11.0 h1:JhV9hk8SmZIqB0To8OyCzPubvszkuoSXWx/7FCEGO+Q=
github.com/samber/hot v0.11.0/go.mod h1:NB9v5U4NfDx7jmlrP+zHuqCuLUsywgAtCH7XOAkOxAg=
github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
@@ -332,6 +357,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

View File

@@ -19,6 +19,7 @@ import (
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/router"
"github.com/QuantumNous/new-api/service"
_ "github.com/QuantumNous/new-api/setting/performance_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/bytedance/gopkg/util/gopool"
@@ -102,6 +103,9 @@ func main() {
go controller.AutomaticallyTestChannels()
// Codex credential auto-refresh check every 10 minutes, refresh when expires within 1 day
service.StartCodexCredentialAutoRefreshTask()
if common.IsMasterNode && constant.UpdateTask {
gopool.Go(func() {
controller.UpdateMidjourneyTaskBulk()
@@ -143,6 +147,7 @@ func main() {
// This will cause SSE not to work!!!
//server.Use(gzip.Gzip(gzip.DefaultCompression))
server.Use(middleware.RequestId())
server.Use(middleware.PoweredBy())
middleware.SetUpLogger(server)
// Initialize session store
store := cookie.NewStore([]byte(common.SessionSecret))
@@ -249,6 +254,9 @@ func InitResources() error {
// Initialize options, should after model.InitDB()
model.InitOptionMap()
// 清理旧的磁盘缓存文件
common.CleanupOldCacheFiles()
// 初始化模型
model.GetPricing()

View File

@@ -13,6 +13,7 @@ import (
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-contrib/sessions"
"github.com/gin-gonic/gin"
@@ -195,7 +196,7 @@ func TokenAuth() func(c *gin.Context) {
}
c.Request.Header.Set("Authorization", "Bearer "+key)
}
// 检查path包含/v1/messages 或 /v1/models
// 检查path包含/v1/messages 或 /v1/models
if strings.Contains(c.Request.URL.Path, "/v1/messages") || strings.Contains(c.Request.URL.Path, "/v1/models") {
anthropicKey := c.Request.Header.Get("x-api-key")
if anthropicKey != "" {
@@ -256,7 +257,7 @@ func TokenAuth() func(c *gin.Context) {
return
}
if common.IsIpInCIDRList(ip, allowIps) == false {
abortWithOpenAiMessage(c, http.StatusForbidden, "您的 IP 不在令牌允许访问的列表中")
abortWithOpenAiMessage(c, http.StatusForbidden, "您的 IP 不在令牌允许访问的列表中", types.ErrorCodeAccessDenied)
return
}
logger.LogDebug(c, "Client IP %s passed the token IP restrictions check", clientIp)

View File

@@ -0,0 +1,18 @@
package middleware
import (
"github.com/QuantumNous/new-api/common"
"github.com/gin-gonic/gin"
)
// BodyStorageCleanup 请求体存储清理中间件
// 在请求处理完成后自动清理磁盘/内存缓存
func BodyStorageCleanup() gin.HandlerFunc {
return func(c *gin.Context) {
// 处理请求
c.Next()
// 请求结束后清理存储
common.CleanupBodyStorage(c)
}
}

View File

@@ -1,6 +1,7 @@
package middleware
import (
"github.com/QuantumNous/new-api/common"
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
)
@@ -13,3 +14,10 @@ func CORS() gin.HandlerFunc {
config.AllowHeaders = []string{"*"}
return cors.New(config)
}
func PoweredBy() gin.HandlerFunc {
return func(c *gin.Context) {
c.Header("X-New-Api-Version", common.Version)
c.Next()
}
}

View File

@@ -97,35 +97,64 @@ func Distribute() func(c *gin.Context) {
common.SetContextKey(c, constant.ContextKeyUsingGroup, usingGroup)
}
}
channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(&service.RetryParam{
Ctx: c,
ModelName: modelRequest.Model,
TokenGroup: usingGroup,
Retry: common.GetPointer(0),
})
if err != nil {
showGroup := usingGroup
if usingGroup == "auto" {
showGroup = fmt.Sprintf("auto(%s)", selectGroup)
if preferredChannelID, found := service.GetPreferredChannelByAffinity(c, modelRequest.Model, usingGroup); found {
preferred, err := model.CacheGetChannel(preferredChannelID)
if err == nil && preferred != nil && preferred.Status == common.ChannelStatusEnabled {
if usingGroup == "auto" {
userGroup := common.GetContextKeyString(c, constant.ContextKeyUserGroup)
autoGroups := service.GetUserAutoGroup(userGroup)
for _, g := range autoGroups {
if model.IsChannelEnabledForGroupModel(g, modelRequest.Model, preferred.Id) {
selectGroup = g
common.SetContextKey(c, constant.ContextKeyAutoGroup, g)
channel = preferred
service.MarkChannelAffinityUsed(c, g, preferred.Id)
break
}
}
} else if model.IsChannelEnabledForGroupModel(usingGroup, modelRequest.Model, preferred.Id) {
channel = preferred
selectGroup = usingGroup
service.MarkChannelAffinityUsed(c, usingGroup, preferred.Id)
}
}
message := fmt.Sprintf("获取分组 %s 下模型 %s 的可用渠道失败distributor: %s", showGroup, modelRequest.Model, err.Error())
// 如果错误,但是渠道不为空,说明是数据库一致性问题
//if channel != nil {
// common.SysError(fmt.Sprintf("渠道不存在:%d", channel.Id))
// message = "数据库一致性已被破坏,请联系管理员"
//}
abortWithOpenAiMessage(c, http.StatusServiceUnavailable, message, string(types.ErrorCodeModelNotFound))
return
}
if channel == nil {
abortWithOpenAiMessage(c, http.StatusServiceUnavailable, fmt.Sprintf("分组 %s 下模型 %s 无可用渠道distributor", usingGroup, modelRequest.Model), string(types.ErrorCodeModelNotFound))
return
channel, selectGroup, err = service.CacheGetRandomSatisfiedChannel(&service.RetryParam{
Ctx: c,
ModelName: modelRequest.Model,
TokenGroup: usingGroup,
Retry: common.GetPointer(0),
})
if err != nil {
showGroup := usingGroup
if usingGroup == "auto" {
showGroup = fmt.Sprintf("auto(%s)", selectGroup)
}
message := fmt.Sprintf("获取分组 %s 下模型 %s 的可用渠道失败distributor: %s", showGroup, modelRequest.Model, err.Error())
// 如果错误,但是渠道不为空,说明是数据库一致性问题
//if channel != nil {
// common.SysError(fmt.Sprintf("渠道不存在:%d", channel.Id))
// message = "数据库一致性已被破坏,请联系管理员"
//}
abortWithOpenAiMessage(c, http.StatusServiceUnavailable, message, types.ErrorCodeModelNotFound)
return
}
if channel == nil {
abortWithOpenAiMessage(c, http.StatusServiceUnavailable, fmt.Sprintf("分组 %s 下模型 %s 无可用渠道distributor", usingGroup, modelRequest.Model), types.ErrorCodeModelNotFound)
return
}
}
}
}
common.SetContextKey(c, constant.ContextKeyRequestStartTime, time.Now())
SetupContextForSelectedChannel(c, channel, modelRequest.Model)
c.Next()
if channel != nil && c.Writer != nil && c.Writer.Status() < http.StatusBadRequest {
service.RecordChannelAffinity(c, channel.Id)
}
}
}
@@ -300,6 +329,10 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
modelRequest.Group = req.Group
common.SetContextKey(c, constant.ContextKeyTokenGroup, modelRequest.Group)
}
if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") && modelRequest.Model != "" {
modelRequest.Model = ratio_setting.WithCompactModelSuffix(modelRequest.Model)
}
return &modelRequest, shouldSelectChannel, nil
}

View File

@@ -5,13 +5,14 @@ import (
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
func abortWithOpenAiMessage(c *gin.Context, statusCode int, message string, code ...string) {
func abortWithOpenAiMessage(c *gin.Context, statusCode int, message string, code ...types.ErrorCode) {
codeStr := ""
if len(code) > 0 {
codeStr = code[0]
codeStr = string(code[0])
}
userId := c.GetInt("id")
c.JSON(statusCode, gin.H{

71
model/channel_satisfy.go Normal file
View File

@@ -0,0 +1,71 @@
package model
import (
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/setting/ratio_setting"
)
func IsChannelEnabledForGroupModel(group string, modelName string, channelID int) bool {
if group == "" || modelName == "" || channelID <= 0 {
return false
}
if !common.MemoryCacheEnabled {
return isChannelEnabledForGroupModelDB(group, modelName, channelID)
}
channelSyncLock.RLock()
defer channelSyncLock.RUnlock()
if group2model2channels == nil {
return false
}
if isChannelIDInList(group2model2channels[group][modelName], channelID) {
return true
}
normalized := ratio_setting.FormatMatchingModelName(modelName)
if normalized != "" && normalized != modelName {
return isChannelIDInList(group2model2channels[group][normalized], channelID)
}
return false
}
func IsChannelEnabledForAnyGroupModel(groups []string, modelName string, channelID int) bool {
if len(groups) == 0 {
return false
}
for _, g := range groups {
if IsChannelEnabledForGroupModel(g, modelName, channelID) {
return true
}
}
return false
}
func isChannelEnabledForGroupModelDB(group string, modelName string, channelID int) bool {
var count int64
err := DB.Model(&Ability{}).
Where(commonGroupCol+" = ? and model = ? and channel_id = ? and enabled = ?", group, modelName, channelID, true).
Count(&count).Error
if err == nil && count > 0 {
return true
}
normalized := ratio_setting.FormatMatchingModelName(modelName)
if normalized == "" || normalized == modelName {
return false
}
count = 0
err = DB.Model(&Ability{}).
Where(commonGroupCol+" = ? and model = ? and channel_id = ? and enabled = ?", group, normalized, channelID, true).
Count(&count).Error
return err == nil && count > 0
}
func isChannelIDInList(list []int, channelID int) bool {
for _, id := range list {
if id == channelID {
return true
}
}
return false
}

View File

@@ -56,8 +56,10 @@ func formatUserLogs(logs []*Log) {
var otherMap map[string]interface{}
otherMap, _ = common.StrToMap(logs[i].Other)
if otherMap != nil {
// delete admin
// Remove admin-only debug fields.
delete(otherMap, "admin_info")
delete(otherMap, "request_conversion")
delete(otherMap, "reject_reason")
}
logs[i].Other = common.MapToJsonStr(otherMap)
logs[i].Id = logs[i].Id % 1024

View File

@@ -9,6 +9,7 @@ import (
"github.com/QuantumNous/new-api/setting"
"github.com/QuantumNous/new-api/setting/config"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/performance_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/setting/system_setting"
)
@@ -143,6 +144,8 @@ func InitOptionMap() {
common.OptionMap["SensitiveWords"] = setting.SensitiveWordsToString()
common.OptionMap["StreamCacheQueueLength"] = strconv.Itoa(setting.StreamCacheQueueLength)
common.OptionMap["AutomaticDisableKeywords"] = operation_setting.AutomaticDisableKeywordsToString()
common.OptionMap["AutomaticDisableStatusCodes"] = operation_setting.AutomaticDisableStatusCodesToString()
common.OptionMap["AutomaticRetryStatusCodes"] = operation_setting.AutomaticRetryStatusCodesToString()
common.OptionMap["ExposeRatioEnabled"] = strconv.FormatBool(ratio_setting.IsExposeRatioEnabled())
// 自动添加所有注册的模型配置
@@ -444,6 +447,10 @@ func updateOptionMap(key string, value string) (err error) {
setting.SensitiveWordsFromString(value)
case "AutomaticDisableKeywords":
operation_setting.AutomaticDisableKeywordsFromString(value)
case "AutomaticDisableStatusCodes":
err = operation_setting.AutomaticDisableStatusCodesFromString(value)
case "AutomaticRetryStatusCodes":
err = operation_setting.AutomaticRetryStatusCodesFromString(value)
case "StreamCacheQueueLength":
setting.StreamCacheQueueLength, _ = strconv.Atoi(value)
case "PayMethods":
@@ -474,5 +481,11 @@ func handleConfigUpdate(key, value string) bool {
}
config.UpdateConfigFromMap(cfg, configMap)
// 特定配置的后处理
if configName == "performance_setting" {
// 同步磁盘缓存配置到 common 包
performance_setting.UpdateAndSync()
}
return true // 已处理
}

53
pkg/cachex/codec.go Normal file
View File

@@ -0,0 +1,53 @@
package cachex
import (
"encoding/json"
"fmt"
"strconv"
"strings"
)
type ValueCodec[V any] interface {
Encode(v V) (string, error)
Decode(s string) (V, error)
}
type IntCodec struct{}
func (c IntCodec) Encode(v int) (string, error) {
return strconv.Itoa(v), nil
}
func (c IntCodec) Decode(s string) (int, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, fmt.Errorf("empty int value")
}
return strconv.Atoi(s)
}
type StringCodec struct{}
func (c StringCodec) Encode(v string) (string, error) { return v, nil }
func (c StringCodec) Decode(s string) (string, error) { return s, nil }
type JSONCodec[V any] struct{}
func (c JSONCodec[V]) Encode(v V) (string, error) {
b, err := json.Marshal(v)
if err != nil {
return "", err
}
return string(b), nil
}
func (c JSONCodec[V]) Decode(s string) (V, error) {
var v V
if strings.TrimSpace(s) == "" {
return v, fmt.Errorf("empty json value")
}
if err := json.Unmarshal([]byte(s), &v); err != nil {
return v, err
}
return v, nil
}

285
pkg/cachex/hybrid_cache.go Normal file
View File

@@ -0,0 +1,285 @@
package cachex
import (
"context"
"errors"
"strings"
"sync"
"time"
"github.com/go-redis/redis/v8"
"github.com/samber/hot"
)
const (
defaultRedisOpTimeout = 2 * time.Second
defaultRedisScanTimeout = 30 * time.Second
defaultRedisDelTimeout = 10 * time.Second
)
type HybridCacheConfig[V any] struct {
Namespace Namespace
// Redis is used when RedisEnabled returns true (or RedisEnabled is nil) and Redis is not nil.
Redis *redis.Client
RedisCodec ValueCodec[V]
RedisEnabled func() bool
// Memory builds a hot cache used when Redis is disabled. Keys stored in memory are fully namespaced.
Memory func() *hot.HotCache[string, V]
}
// HybridCache is a small helper that uses Redis when enabled, otherwise falls back to in-memory hot cache.
type HybridCache[V any] struct {
ns Namespace
redis *redis.Client
redisCodec ValueCodec[V]
redisEnabled func() bool
memOnce sync.Once
memInit func() *hot.HotCache[string, V]
mem *hot.HotCache[string, V]
}
func NewHybridCache[V any](cfg HybridCacheConfig[V]) *HybridCache[V] {
return &HybridCache[V]{
ns: cfg.Namespace,
redis: cfg.Redis,
redisCodec: cfg.RedisCodec,
redisEnabled: cfg.RedisEnabled,
memInit: cfg.Memory,
}
}
func (c *HybridCache[V]) FullKey(key string) string {
return c.ns.FullKey(key)
}
func (c *HybridCache[V]) redisOn() bool {
if c.redis == nil || c.redisCodec == nil {
return false
}
if c.redisEnabled == nil {
return true
}
return c.redisEnabled()
}
func (c *HybridCache[V]) memCache() *hot.HotCache[string, V] {
c.memOnce.Do(func() {
if c.memInit == nil {
c.mem = hot.NewHotCache[string, V](hot.LRU, 1).Build()
return
}
c.mem = c.memInit()
})
return c.mem
}
func (c *HybridCache[V]) Get(key string) (value V, found bool, err error) {
full := c.ns.FullKey(key)
if full == "" {
var zero V
return zero, false, nil
}
if c.redisOn() {
ctx, cancel := context.WithTimeout(context.Background(), defaultRedisOpTimeout)
defer cancel()
raw, e := c.redis.Get(ctx, full).Result()
if e == nil {
v, decErr := c.redisCodec.Decode(raw)
if decErr != nil {
var zero V
return zero, false, decErr
}
return v, true, nil
}
if errors.Is(e, redis.Nil) {
var zero V
return zero, false, nil
}
var zero V
return zero, false, e
}
return c.memCache().Get(full)
}
func (c *HybridCache[V]) SetWithTTL(key string, v V, ttl time.Duration) error {
full := c.ns.FullKey(key)
if full == "" {
return nil
}
if c.redisOn() {
raw, err := c.redisCodec.Encode(v)
if err != nil {
return err
}
ctx, cancel := context.WithTimeout(context.Background(), defaultRedisOpTimeout)
defer cancel()
return c.redis.Set(ctx, full, raw, ttl).Err()
}
c.memCache().SetWithTTL(full, v, ttl)
return nil
}
// Keys returns keys with valid values. In Redis, it returns all matching keys.
func (c *HybridCache[V]) Keys() ([]string, error) {
if c.redisOn() {
return c.scanKeys(c.ns.MatchPattern())
}
return c.memCache().Keys(), nil
}
func (c *HybridCache[V]) scanKeys(match string) ([]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), defaultRedisScanTimeout)
defer cancel()
var cursor uint64
keys := make([]string, 0, 1024)
for {
k, next, err := c.redis.Scan(ctx, cursor, match, 1000).Result()
if err != nil {
return keys, err
}
keys = append(keys, k...)
cursor = next
if cursor == 0 {
break
}
}
return keys, nil
}
func (c *HybridCache[V]) Purge() error {
if c.redisOn() {
keys, err := c.scanKeys(c.ns.MatchPattern())
if err != nil {
return err
}
if len(keys) == 0 {
return nil
}
_, err = c.DeleteMany(keys)
return err
}
c.memCache().Purge()
return nil
}
func (c *HybridCache[V]) DeleteByPrefix(prefix string) (int, error) {
fullPrefix := c.ns.FullKey(prefix)
if fullPrefix == "" {
return 0, nil
}
if !strings.HasSuffix(fullPrefix, ":") {
fullPrefix += ":"
}
if c.redisOn() {
match := fullPrefix + "*"
keys, err := c.scanKeys(match)
if err != nil {
return 0, err
}
if len(keys) == 0 {
return 0, nil
}
res, err := c.DeleteMany(keys)
if err != nil {
return 0, err
}
deleted := 0
for _, ok := range res {
if ok {
deleted++
}
}
return deleted, nil
}
// In memory, we filter keys and bulk delete.
allKeys := c.memCache().Keys()
keys := make([]string, 0, 128)
for _, k := range allKeys {
if strings.HasPrefix(k, fullPrefix) {
keys = append(keys, k)
}
}
if len(keys) == 0 {
return 0, nil
}
res, _ := c.DeleteMany(keys)
deleted := 0
for _, ok := range res {
if ok {
deleted++
}
}
return deleted, nil
}
// DeleteMany accepts either fully namespaced keys or raw keys and deletes them.
// It returns a map keyed by fully namespaced keys.
func (c *HybridCache[V]) DeleteMany(keys []string) (map[string]bool, error) {
res := make(map[string]bool, len(keys))
if len(keys) == 0 {
return res, nil
}
fullKeys := make([]string, 0, len(keys))
for _, k := range keys {
k = c.ns.FullKey(k)
if k == "" {
continue
}
fullKeys = append(fullKeys, k)
}
if len(fullKeys) == 0 {
return res, nil
}
if c.redisOn() {
ctx, cancel := context.WithTimeout(context.Background(), defaultRedisDelTimeout)
defer cancel()
pipe := c.redis.Pipeline()
cmds := make([]*redis.IntCmd, 0, len(fullKeys))
for _, k := range fullKeys {
// UNLINK is non-blocking vs DEL for large key batches.
cmds = append(cmds, pipe.Unlink(ctx, k))
}
_, err := pipe.Exec(ctx)
if err != nil && !errors.Is(err, redis.Nil) {
return res, err
}
for i, cmd := range cmds {
deleted := cmd != nil && cmd.Err() == nil && cmd.Val() > 0
res[fullKeys[i]] = deleted
}
return res, nil
}
return c.memCache().DeleteMany(fullKeys), nil
}
func (c *HybridCache[V]) Capacity() (mainCacheCapacity int, missingCacheCapacity int) {
if c.redisOn() {
return 0, 0
}
return c.memCache().Capacity()
}
func (c *HybridCache[V]) Algorithm() (mainCacheAlgorithm string, missingCacheAlgorithm string) {
if c.redisOn() {
return "redis", ""
}
return c.memCache().Algorithm()
}

38
pkg/cachex/namespace.go Normal file
View File

@@ -0,0 +1,38 @@
package cachex
import "strings"
// Namespace isolates keys between different cache use-cases. (e.g. "channel_affinity:v1").
type Namespace string
func (n Namespace) prefix() string {
ns := strings.TrimSpace(string(n))
ns = strings.TrimRight(ns, ":")
if ns == "" {
return ""
}
return ns + ":"
}
func (n Namespace) FullKey(key string) string {
key = strings.TrimSpace(key)
if key == "" {
return ""
}
p := n.prefix()
if p == "" {
return strings.TrimLeft(key, ":")
}
if strings.HasPrefix(key, p) {
return key
}
return p + strings.TrimLeft(key, ":")
}
func (n Namespace) MatchPattern() string {
p := n.prefix()
if p == "" {
return "*"
}
return p + "*"
}

View File

@@ -13,6 +13,8 @@ import (
"github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
@@ -22,6 +24,18 @@ type Adaptor struct {
IsSyncImageModel bool
}
/*
var syncModels = []string{
"z-image",
"qwen-image",
"wan2.6",
}
*/
func supportsAliAnthropicMessages(modelName string) bool {
// Only models with the "qwen" designation can use the Claude-compatible interface; others require conversion.
return strings.Contains(strings.ToLower(modelName), "qwen")
}
var syncModels = []string{
"z-image",
"qwen-image",
@@ -29,12 +43,7 @@ var syncModels = []string{
}
func isSyncImageModel(modelName string) bool {
for _, m := range syncModels {
if strings.Contains(modelName, m) {
return true
}
}
return false
return model_setting.IsSyncImageModel(modelName)
}
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
@@ -43,7 +52,18 @@ func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dt
}
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
return req, nil
if supportsAliAnthropicMessages(info.UpstreamModelName) {
return req, nil
}
oaiReq, err := service.ClaudeToOpenAIRequest(*req, info)
if err != nil {
return nil, err
}
if info.SupportStreamOptions && info.IsStream {
oaiReq.StreamOptions = &dto.StreamOptions{IncludeUsage: true}
}
return a.ConvertOpenAIRequest(c, info, oaiReq)
}
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
@@ -53,13 +73,19 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
var fullRequestURL string
switch info.RelayFormat {
case types.RelayFormatClaude:
fullRequestURL = fmt.Sprintf("%s/api/v2/apps/claude-code-proxy/v1/messages", info.ChannelBaseUrl)
if supportsAliAnthropicMessages(info.UpstreamModelName) {
fullRequestURL = fmt.Sprintf("%s/apps/anthropic/v1/messages", info.ChannelBaseUrl)
} else {
fullRequestURL = fmt.Sprintf("%s/compatible-mode/v1/chat/completions", info.ChannelBaseUrl)
}
default:
switch info.RelayMode {
case constant.RelayModeEmbeddings:
fullRequestURL = fmt.Sprintf("%s/compatible-mode/v1/embeddings", info.ChannelBaseUrl)
case constant.RelayModeRerank:
fullRequestURL = fmt.Sprintf("%s/api/v1/services/rerank/text-rerank/text-rerank", info.ChannelBaseUrl)
case constant.RelayModeResponses:
fullRequestURL = fmt.Sprintf("%s/api/v2/apps/protocols/compatible-mode/v1/responses", info.ChannelBaseUrl)
case constant.RelayModeImagesGenerations:
if isSyncImageModel(info.OriginModelName) {
fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/multimodal-generation/generation", info.ChannelBaseUrl)
@@ -186,8 +212,7 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf
}
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
//TODO implement me
return nil, errors.New("not implemented")
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
@@ -197,11 +222,16 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
switch info.RelayFormat {
case types.RelayFormatClaude:
if info.IsStream {
return claude.ClaudeStreamHandler(c, resp, info, claude.RequestModeMessage)
} else {
if supportsAliAnthropicMessages(info.UpstreamModelName) {
if info.IsStream {
return claude.ClaudeStreamHandler(c, resp, info, claude.RequestModeMessage)
}
return claude.ClaudeHandler(c, resp, info, claude.RequestModeMessage)
}
adaptor := openai.Adaptor{}
return adaptor.DoResponse(c, resp, info)
default:
switch info.RelayMode {
case constant.RelayModeImagesGenerations:

View File

@@ -38,9 +38,46 @@ func SetupApiRequestHeader(info *common.RelayInfo, c *gin.Context, req *http.Hea
}
}
// processHeaderOverride 处理请求头覆盖,支持变量替换
// 支持的变量:{api_key}
func processHeaderOverride(info *common.RelayInfo) (map[string]string, error) {
const clientHeaderPlaceholderPrefix = "{client_header:"
func applyHeaderOverridePlaceholders(template string, c *gin.Context, apiKey string) (string, bool, error) {
trimmed := strings.TrimSpace(template)
if strings.HasPrefix(trimmed, clientHeaderPlaceholderPrefix) {
afterPrefix := trimmed[len(clientHeaderPlaceholderPrefix):]
end := strings.Index(afterPrefix, "}")
if end < 0 || end != len(afterPrefix)-1 {
return "", false, fmt.Errorf("client_header placeholder must be the full value: %q", template)
}
name := strings.TrimSpace(afterPrefix[:end])
if name == "" {
return "", false, fmt.Errorf("client_header placeholder name is empty: %q", template)
}
if c == nil || c.Request == nil {
return "", false, fmt.Errorf("missing request context for client_header placeholder")
}
clientHeaderValue := c.Request.Header.Get(name)
if strings.TrimSpace(clientHeaderValue) == "" {
return "", false, nil
}
// Do not interpolate {api_key} inside client-supplied content.
return clientHeaderValue, true, nil
}
if strings.Contains(template, "{api_key}") {
template = strings.ReplaceAll(template, "{api_key}", apiKey)
}
if strings.TrimSpace(template) == "" {
return "", false, nil
}
return template, true, nil
}
// processHeaderOverride applies channel header overrides, with placeholder substitution.
// Supported placeholders:
// - {api_key}: resolved to the channel API key
// - {client_header:<name>}: resolved to the incoming request header value
func processHeaderOverride(info *common.RelayInfo, c *gin.Context) (map[string]string, error) {
headerOverride := make(map[string]string)
for k, v := range info.HeadersOverride {
str, ok := v.(string)
@@ -48,16 +85,32 @@ func processHeaderOverride(info *common.RelayInfo) (map[string]string, error) {
return nil, types.NewError(nil, types.ErrorCodeChannelHeaderOverrideInvalid)
}
// 替换支持的变量
if strings.Contains(str, "{api_key}") {
str = strings.ReplaceAll(str, "{api_key}", info.ApiKey)
value, include, err := applyHeaderOverridePlaceholders(str, c, info.ApiKey)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeChannelHeaderOverrideInvalid)
}
if !include {
continue
}
headerOverride[k] = str
headerOverride[k] = value
}
return headerOverride, nil
}
func applyHeaderOverrideToRequest(req *http.Request, headerOverride map[string]string) {
if req == nil {
return
}
for key, value := range headerOverride {
req.Header.Set(key, value)
// set Host in req
if strings.EqualFold(key, "Host") {
req.Host = value
}
}
}
func DoApiRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody io.Reader) (*http.Response, error) {
fullRequestURL, err := a.GetRequestURL(info)
if err != nil {
@@ -71,17 +124,17 @@ func DoApiRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
return nil, fmt.Errorf("new request failed: %w", err)
}
headers := req.Header
headerOverride, err := processHeaderOverride(info)
if err != nil {
return nil, err
}
for key, value := range headerOverride {
headers.Set(key, value)
}
err = a.SetupRequestHeader(c, &headers, info)
if err != nil {
return nil, fmt.Errorf("setup request header failed: %w", err)
}
// 在 SetupRequestHeader 之后应用 Header Override确保用户设置优先级最高
// 这样可以覆盖默认的 Authorization header 设置
headerOverride, err := processHeaderOverride(info, c)
if err != nil {
return nil, err
}
applyHeaderOverrideToRequest(req, headerOverride)
resp, err := doRequest(c, req, info)
if err != nil {
return nil, fmt.Errorf("do request failed: %w", err)
@@ -104,17 +157,17 @@ func DoFormRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBod
// set form data
req.Header.Set("Content-Type", c.Request.Header.Get("Content-Type"))
headers := req.Header
headerOverride, err := processHeaderOverride(info)
if err != nil {
return nil, err
}
for key, value := range headerOverride {
headers.Set(key, value)
}
err = a.SetupRequestHeader(c, &headers, info)
if err != nil {
return nil, fmt.Errorf("setup request header failed: %w", err)
}
// 在 SetupRequestHeader 之后应用 Header Override确保用户设置优先级最高
// 这样可以覆盖默认的 Authorization header 设置
headerOverride, err := processHeaderOverride(info, c)
if err != nil {
return nil, err
}
applyHeaderOverrideToRequest(req, headerOverride)
resp, err := doRequest(c, req, info)
if err != nil {
return nil, fmt.Errorf("do request failed: %w", err)
@@ -128,17 +181,19 @@ func DoWssRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
return nil, fmt.Errorf("get request url failed: %w", err)
}
targetHeader := http.Header{}
headerOverride, err := processHeaderOverride(info)
err = a.SetupRequestHeader(c, &targetHeader, info)
if err != nil {
return nil, fmt.Errorf("setup request header failed: %w", err)
}
// 在 SetupRequestHeader 之后应用 Header Override确保用户设置优先级最高
// 这样可以覆盖默认的 Authorization header 设置
headerOverride, err := processHeaderOverride(info, c)
if err != nil {
return nil, err
}
for key, value := range headerOverride {
targetHeader.Set(key, value)
}
err = a.SetupRequestHeader(c, &targetHeader, info)
if err != nil {
return nil, fmt.Errorf("setup request header failed: %w", err)
}
targetHeader.Set("Content-Type", c.Request.Header.Get("Content-Type"))
targetConn, _, err := websocket.DefaultDialer.Dial(fullRequestURL, targetHeader)
if err != nil {

View File

@@ -8,11 +8,13 @@ import (
"strings"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/relay/channel/openrouter"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/relay/reasonmap"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/types"
@@ -27,17 +29,15 @@ const (
)
func stopReasonClaude2OpenAI(reason string) string {
switch reason {
case "stop_sequence":
return "stop"
case "end_turn":
return "stop"
case "max_tokens":
return "length"
case "tool_use":
return "tool_calls"
default:
return reason
return reasonmap.ClaudeStopReasonToOpenAIFinishReason(reason)
}
func maybeMarkClaudeRefusal(c *gin.Context, stopReason string) {
if c == nil {
return
}
if strings.EqualFold(stopReason, "refusal") {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, "claude_stop_reason=refusal")
}
}
@@ -437,8 +437,10 @@ func StreamResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse
}
} else {
if claudeResponse.Type == "message_start" {
response.Id = claudeResponse.Message.Id
response.Model = claudeResponse.Message.Model
if claudeResponse.Message != nil {
response.Id = claudeResponse.Message.Id
response.Model = claudeResponse.Message.Model
}
//claudeUsage = &claudeResponse.Message.Usage
choice.Delta.SetContentString("")
choice.Delta.Role = "assistant"
@@ -589,35 +591,63 @@ type ClaudeResponseInfo struct {
}
func FormatClaudeResponseInfo(requestMode int, claudeResponse *dto.ClaudeResponse, oaiResponse *dto.ChatCompletionsStreamResponse, claudeInfo *ClaudeResponseInfo) bool {
if claudeInfo == nil {
return false
}
if claudeInfo.Usage == nil {
claudeInfo.Usage = &dto.Usage{}
}
if requestMode == RequestModeCompletion {
claudeInfo.ResponseText.WriteString(claudeResponse.Completion)
} else {
if claudeResponse.Type == "message_start" {
claudeInfo.ResponseId = claudeResponse.Message.Id
claudeInfo.Model = claudeResponse.Message.Model
if claudeResponse.Message != nil {
claudeInfo.ResponseId = claudeResponse.Message.Id
claudeInfo.Model = claudeResponse.Message.Model
}
// message_start, 获取usage
claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Message.Usage.GetCacheCreation1hTokens()
claudeInfo.Usage.CompletionTokens = claudeResponse.Message.Usage.OutputTokens
} else if claudeResponse.Type == "content_block_delta" {
if claudeResponse.Delta.Text != nil {
claudeInfo.ResponseText.WriteString(*claudeResponse.Delta.Text)
if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil {
claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Message.Usage.GetCacheCreation1hTokens()
claudeInfo.Usage.CompletionTokens = claudeResponse.Message.Usage.OutputTokens
}
if claudeResponse.Delta.Thinking != nil {
claudeInfo.ResponseText.WriteString(*claudeResponse.Delta.Thinking)
} else if claudeResponse.Type == "content_block_delta" {
if claudeResponse.Delta != nil {
if claudeResponse.Delta.Text != nil {
claudeInfo.ResponseText.WriteString(*claudeResponse.Delta.Text)
}
if claudeResponse.Delta.Thinking != nil {
claudeInfo.ResponseText.WriteString(*claudeResponse.Delta.Thinking)
}
}
} else if claudeResponse.Type == "message_delta" {
// 最终的usage获取
if claudeResponse.Usage.InputTokens > 0 {
// 不叠加,只取最新的
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
if claudeResponse.Usage != nil {
if claudeResponse.Usage.InputTokens > 0 {
// 不叠加,只取最新的
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
}
if claudeResponse.Usage.CacheReadInputTokens > 0 {
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
}
if claudeResponse.Usage.CacheCreationInputTokens > 0 {
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
}
if cacheCreation5m := claudeResponse.Usage.GetCacheCreation5mTokens(); cacheCreation5m > 0 {
claudeInfo.Usage.ClaudeCacheCreation5mTokens = cacheCreation5m
}
if cacheCreation1h := claudeResponse.Usage.GetCacheCreation1hTokens(); cacheCreation1h > 0 {
claudeInfo.Usage.ClaudeCacheCreation1hTokens = cacheCreation1h
}
if claudeResponse.Usage.OutputTokens > 0 {
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
}
claudeInfo.Usage.TotalTokens = claudeInfo.Usage.PromptTokens + claudeInfo.Usage.CompletionTokens
}
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
claudeInfo.Usage.TotalTokens = claudeInfo.Usage.PromptTokens + claudeInfo.Usage.CompletionTokens
// 判断是否完整
claudeInfo.Done = true
@@ -644,6 +674,12 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
}
if claudeResponse.StopReason != "" {
maybeMarkClaudeRefusal(c, claudeResponse.StopReason)
}
if claudeResponse.Delta != nil && claudeResponse.Delta.StopReason != nil {
maybeMarkClaudeRefusal(c, *claudeResponse.Delta.StopReason)
}
if info.RelayFormat == types.RelayFormatClaude {
FormatClaudeResponseInfo(requestMode, &claudeResponse, nil, claudeInfo)
@@ -651,7 +687,9 @@ func HandleStreamResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
} else {
if claudeResponse.Type == "message_start" {
// message_start, 获取usage
info.UpstreamModelName = claudeResponse.Message.Model
if claudeResponse.Message != nil {
info.UpstreamModelName = claudeResponse.Message.Model
}
} else if claudeResponse.Type == "content_block_delta" {
} else if claudeResponse.Type == "message_delta" {
}
@@ -735,16 +773,22 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
if claudeError := claudeResponse.GetClaudeError(); claudeError != nil && claudeError.Type != "" {
return types.WithClaudeError(*claudeError, http.StatusInternalServerError)
}
maybeMarkClaudeRefusal(c, claudeResponse.StopReason)
if requestMode == RequestModeCompletion {
claudeInfo.Usage = service.ResponseText2Usage(c, claudeResponse.Completion, info.UpstreamModelName, info.GetEstimatePromptTokens())
} else {
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Usage.GetCacheCreation1hTokens()
if claudeInfo.Usage == nil {
claudeInfo.Usage = &dto.Usage{}
}
if claudeResponse.Usage != nil {
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
claudeInfo.Usage.ClaudeCacheCreation1hTokens = claudeResponse.Usage.GetCacheCreation1hTokens()
}
}
var responseData []byte
switch info.RelayFormat {
@@ -759,7 +803,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
responseData = data
}
if claudeResponse.Usage.ServerToolUse != nil && claudeResponse.Usage.ServerToolUse.WebSearchRequests > 0 {
if claudeResponse.Usage != nil && claudeResponse.Usage.ServerToolUse != nil && claudeResponse.Usage.ServerToolUse.WebSearchRequests > 0 {
c.Set("claude_web_search_requests", claudeResponse.Usage.ServerToolUse.WebSearchRequests)
}
@@ -831,9 +875,12 @@ func mapToolChoice(toolChoice any, parallelToolCalls *bool) *dto.ClaudeToolChoic
}
}
// 设置 disable_parallel_tool_use
// 如果 parallel_tool_calls 为 true disable_parallel_tool_use 为 false
claudeToolChoice.DisableParallelToolUse = !*parallelToolCalls
// Anthropic schema: tool_choice.type=none does not accept extra fields.
// When tools are disabled, parallel_tool_calls is irrelevant, so we drop it.
if claudeToolChoice.Type != "none" {
// 如果 parallel_tool_calls 为 true则 disable_parallel_tool_use 为 false
claudeToolChoice.DisableParallelToolUse = !*parallelToolCalls
}
}
return claudeToolChoice

View File

@@ -0,0 +1,176 @@
package codex
import (
"encoding/json"
"errors"
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type Adaptor struct {
}
func (a *Adaptor) ConvertGeminiRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeminiChatRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertClaudeRequest(*gin.Context, *relaycommon.RelayInfo, *dto.ClaudeRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
}
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
return nil, errors.New("codex channel: endpoint not supported")
}
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
isCompact := info != nil && info.RelayMode == relayconstant.RelayModeResponsesCompact
if info != nil && info.ChannelSetting.SystemPrompt != "" {
systemPrompt := info.ChannelSetting.SystemPrompt
if len(request.Instructions) == 0 {
if b, err := common.Marshal(systemPrompt); err == nil {
request.Instructions = b
} else {
return nil, err
}
} else if info.ChannelSetting.SystemPromptOverride {
var existing string
if err := common.Unmarshal(request.Instructions, &existing); err == nil {
existing = strings.TrimSpace(existing)
if existing == "" {
if b, err := common.Marshal(systemPrompt); err == nil {
request.Instructions = b
} else {
return nil, err
}
} else {
if b, err := common.Marshal(systemPrompt + "\n" + existing); err == nil {
request.Instructions = b
} else {
return nil, err
}
}
} else {
if b, err := common.Marshal(systemPrompt); err == nil {
request.Instructions = b
} else {
return nil, err
}
}
}
}
if isCompact {
return request, nil
}
// codex: store must be false
request.Store = json.RawMessage("false")
// rm max_output_tokens
request.MaxOutputTokens = 0
request.Temperature = nil
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
return channel.DoApiRequest(a, c, info, requestBody)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
return nil, types.NewError(errors.New("codex channel: endpoint not supported"), types.ErrorCodeInvalidRequest)
}
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
return openai.OaiResponsesCompactionHandler(c, resp)
}
if info.IsStream {
return openai.OaiResponsesStreamHandler(c, info, resp)
}
return openai.OaiResponsesHandler(c, info, resp)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
}
func (a *Adaptor) GetChannelName() string {
return ChannelName
}
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
return "", errors.New("codex channel: only /v1/responses and /v1/responses/compact are supported")
}
path := "/backend-api/codex/responses"
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
path = "/backend-api/codex/responses/compact"
}
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, path, info.ChannelType), nil
}
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
channel.SetupApiRequestHeader(info, c, req)
key := strings.TrimSpace(info.ApiKey)
if !strings.HasPrefix(key, "{") {
return errors.New("codex channel: key must be a JSON object")
}
oauthKey, err := ParseOAuthKey(key)
if err != nil {
return err
}
accessToken := strings.TrimSpace(oauthKey.AccessToken)
accountID := strings.TrimSpace(oauthKey.AccountID)
if accessToken == "" {
return errors.New("codex channel: access_token is required")
}
if accountID == "" {
return errors.New("codex channel: account_id is required")
}
req.Set("Authorization", "Bearer "+accessToken)
req.Set("chatgpt-account-id", accountID)
if req.Get("OpenAI-Beta") == "" {
req.Set("OpenAI-Beta", "responses=experimental")
}
if req.Get("originator") == "" {
req.Set("originator", "codex_cli_rs")
}
return nil
}

View File

@@ -0,0 +1,25 @@
package codex
import (
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/samber/lo"
)
var baseModelList = []string{
"gpt-5", "gpt-5-codex", "gpt-5-codex-mini",
"gpt-5.1", "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini",
"gpt-5.2", "gpt-5.2-codex",
}
var ModelList = withCompactModelSuffix(baseModelList)
const ChannelName = "codex"
func withCompactModelSuffix(models []string) []string {
out := make([]string, 0, len(models)*2)
out = append(out, models...)
out = append(out, lo.Map(models, func(model string, _ int) string {
return ratio_setting.WithCompactModelSuffix(model)
})...)
return lo.Uniq(out)
}

View File

@@ -0,0 +1,30 @@
package codex
import (
"errors"
"github.com/QuantumNous/new-api/common"
)
type OAuthKey struct {
IDToken string `json:"id_token,omitempty"`
AccessToken string `json:"access_token,omitempty"`
RefreshToken string `json:"refresh_token,omitempty"`
AccountID string `json:"account_id,omitempty"`
LastRefresh string `json:"last_refresh,omitempty"`
Email string `json:"email,omitempty"`
Type string `json:"type,omitempty"`
Expired string `json:"expired,omitempty"`
}
func ParseOAuthKey(raw string) (*OAuthKey, error) {
if raw == "" {
return nil, errors.New("codex channel: empty oauth key")
}
var key OAuthKey
if err := common.Unmarshal([]byte(raw), &key); err != nil {
return nil, errors.New("codex channel: invalid oauth key json")
}
return &key, nil
}

View File

@@ -1,10 +1,12 @@
package gemini
import (
"fmt"
"io"
"net/http"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
relaycommon "github.com/QuantumNous/new-api/relay/common"
@@ -35,6 +37,10 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if len(geminiResponse.Candidates) == 0 && geminiResponse.PromptFeedback != nil && geminiResponse.PromptFeedback.BlockReason != nil {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, fmt.Sprintf("gemini_block_reason=%s", *geminiResponse.PromptFeedback.BlockReason))
}
// 计算使用量(基于 UsageMetadata
usage := dto.Usage{
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
@@ -43,6 +49,7 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
}
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {

View File

@@ -1,6 +1,7 @@
package gemini
import (
"context"
"encoding/json"
"errors"
"fmt"
@@ -8,6 +9,7 @@ import (
"net/http"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/QuantumNous/new-api/common"
@@ -215,6 +217,13 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
"IMAGE",
}
}
if stopSequences := parseStopSequences(textRequest.Stop); len(stopSequences) > 0 {
// Gemini supports up to 5 stop sequences
if len(stopSequences) > 5 {
stopSequences = stopSequences[:5]
}
geminiRequest.GenerationConfig.StopSequences = stopSequences
}
adaptorWithExtraBody := false
@@ -357,6 +366,13 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
})
}
geminiRequest.SetTools(geminiTools)
// [NEW] Convert OpenAI tool_choice to Gemini toolConfig.functionCallingConfig
// Mapping: "auto" -> "AUTO", "none" -> "NONE", "required" -> "ANY"
// Object format: {"type": "function", "function": {"name": "xxx"}} -> "ANY" + allowedFunctionNames
if textRequest.ToolChoice != nil {
geminiRequest.ToolConfig = convertToolChoiceToGeminiConfig(textRequest.ToolChoice)
}
}
if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
@@ -622,6 +638,31 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
return &geminiRequest, nil
}
// parseStopSequences 解析停止序列,支持字符串或字符串数组
func parseStopSequences(stop any) []string {
if stop == nil {
return nil
}
switch v := stop.(type) {
case string:
if v != "" {
return []string{v}
}
case []string:
return v
case []interface{}:
sequences := make([]string, 0, len(v))
for _, item := range v {
if str, ok := item.(string); ok && str != "" {
sequences = append(sequences, str)
}
}
return sequences
}
return nil
}
func hasFunctionCallContent(call *dto.FunctionCall) bool {
if call == nil {
return false
@@ -653,101 +694,84 @@ func getSupportedMimeTypesList() []string {
return keys
}
var geminiOpenAPISchemaAllowedFields = map[string]struct{}{
"anyOf": {},
"default": {},
"description": {},
"enum": {},
"example": {},
"format": {},
"items": {},
"maxItems": {},
"maxLength": {},
"maxProperties": {},
"maximum": {},
"minItems": {},
"minLength": {},
"minProperties": {},
"minimum": {},
"nullable": {},
"pattern": {},
"properties": {},
"propertyOrdering": {},
"required": {},
"title": {},
"type": {},
}
const geminiFunctionSchemaMaxDepth = 64
// cleanFunctionParameters recursively removes unsupported fields from Gemini function parameters.
func cleanFunctionParameters(params interface{}) interface{} {
return cleanFunctionParametersWithDepth(params, 0)
}
func cleanFunctionParametersWithDepth(params interface{}, depth int) interface{} {
if params == nil {
return nil
}
if depth >= geminiFunctionSchemaMaxDepth {
return cleanFunctionParametersShallow(params)
}
switch v := params.(type) {
case map[string]interface{}:
// Create a copy to avoid modifying the original
cleanedMap := make(map[string]interface{})
// Keep only Gemini-supported OpenAPI schema subset fields (per official SDK Schema).
cleanedMap := make(map[string]interface{}, len(v))
for k, val := range v {
cleanedMap[k] = val
}
// Remove unsupported root-level fields
delete(cleanedMap, "default")
delete(cleanedMap, "exclusiveMaximum")
delete(cleanedMap, "exclusiveMinimum")
delete(cleanedMap, "$schema")
delete(cleanedMap, "additionalProperties")
// Check and clean 'format' for string types
if propType, typeExists := cleanedMap["type"].(string); typeExists && propType == "string" {
if formatValue, formatExists := cleanedMap["format"].(string); formatExists {
if formatValue != "enum" && formatValue != "date-time" {
delete(cleanedMap, "format")
}
if _, ok := geminiOpenAPISchemaAllowedFields[k]; ok {
cleanedMap[k] = val
}
}
normalizeGeminiSchemaTypeAndNullable(cleanedMap)
// Clean properties
if props, ok := cleanedMap["properties"].(map[string]interface{}); ok && props != nil {
cleanedProps := make(map[string]interface{})
for propName, propValue := range props {
cleanedProps[propName] = cleanFunctionParameters(propValue)
cleanedProps[propName] = cleanFunctionParametersWithDepth(propValue, depth+1)
}
cleanedMap["properties"] = cleanedProps
}
// Recursively clean items in arrays
if items, ok := cleanedMap["items"].(map[string]interface{}); ok && items != nil {
cleanedMap["items"] = cleanFunctionParameters(items)
cleanedMap["items"] = cleanFunctionParametersWithDepth(items, depth+1)
}
// Also handle items if it's an array of schemas
if itemsArray, ok := cleanedMap["items"].([]interface{}); ok {
cleanedItemsArray := make([]interface{}, len(itemsArray))
for i, item := range itemsArray {
cleanedItemsArray[i] = cleanFunctionParameters(item)
}
cleanedMap["items"] = cleanedItemsArray
// OpenAPI tuple-style items is not supported by Gemini SDK Schema; keep first to avoid API rejection.
if itemsArray, ok := cleanedMap["items"].([]interface{}); ok && len(itemsArray) > 0 {
cleanedMap["items"] = cleanFunctionParametersWithDepth(itemsArray[0], depth+1)
}
// Recursively clean other schema composition keywords
for _, field := range []string{"allOf", "anyOf", "oneOf"} {
if nested, ok := cleanedMap[field].([]interface{}); ok {
cleanedNested := make([]interface{}, len(nested))
for i, item := range nested {
cleanedNested[i] = cleanFunctionParameters(item)
}
cleanedMap[field] = cleanedNested
}
}
// Recursively clean patternProperties
if patternProps, ok := cleanedMap["patternProperties"].(map[string]interface{}); ok {
cleanedPatternProps := make(map[string]interface{})
for pattern, schema := range patternProps {
cleanedPatternProps[pattern] = cleanFunctionParameters(schema)
}
cleanedMap["patternProperties"] = cleanedPatternProps
}
// Recursively clean definitions
if definitions, ok := cleanedMap["definitions"].(map[string]interface{}); ok {
cleanedDefinitions := make(map[string]interface{})
for defName, defSchema := range definitions {
cleanedDefinitions[defName] = cleanFunctionParameters(defSchema)
}
cleanedMap["definitions"] = cleanedDefinitions
}
// Recursively clean $defs (newer JSON Schema draft)
if defs, ok := cleanedMap["$defs"].(map[string]interface{}); ok {
cleanedDefs := make(map[string]interface{})
for defName, defSchema := range defs {
cleanedDefs[defName] = cleanFunctionParameters(defSchema)
}
cleanedMap["$defs"] = cleanedDefs
}
// Clean conditional keywords
for _, field := range []string{"if", "then", "else", "not"} {
if nested, ok := cleanedMap[field]; ok {
cleanedMap[field] = cleanFunctionParameters(nested)
// Recursively clean anyOf
if nested, ok := cleanedMap["anyOf"].([]interface{}); ok && nested != nil {
cleanedNested := make([]interface{}, len(nested))
for i, item := range nested {
cleanedNested[i] = cleanFunctionParametersWithDepth(item, depth+1)
}
cleanedMap["anyOf"] = cleanedNested
}
return cleanedMap
@@ -756,7 +780,7 @@ func cleanFunctionParameters(params interface{}) interface{} {
// Handle arrays of schemas
cleanedArray := make([]interface{}, len(v))
for i, item := range v {
cleanedArray[i] = cleanFunctionParameters(item)
cleanedArray[i] = cleanFunctionParametersWithDepth(item, depth+1)
}
return cleanedArray
@@ -766,6 +790,91 @@ func cleanFunctionParameters(params interface{}) interface{} {
}
}
func cleanFunctionParametersShallow(params interface{}) interface{} {
switch v := params.(type) {
case map[string]interface{}:
cleanedMap := make(map[string]interface{}, len(v))
for k, val := range v {
if _, ok := geminiOpenAPISchemaAllowedFields[k]; ok {
cleanedMap[k] = val
}
}
normalizeGeminiSchemaTypeAndNullable(cleanedMap)
// Stop recursion and avoid retaining huge nested structures.
delete(cleanedMap, "properties")
delete(cleanedMap, "items")
delete(cleanedMap, "anyOf")
return cleanedMap
case []interface{}:
// Prefer an empty list over deep recursion on attacker-controlled inputs.
return []interface{}{}
default:
return params
}
}
func normalizeGeminiSchemaTypeAndNullable(schema map[string]interface{}) {
rawType, ok := schema["type"]
if !ok || rawType == nil {
return
}
normalize := func(t string) (string, bool) {
switch strings.ToLower(strings.TrimSpace(t)) {
case "object":
return "OBJECT", false
case "array":
return "ARRAY", false
case "string":
return "STRING", false
case "integer":
return "INTEGER", false
case "number":
return "NUMBER", false
case "boolean":
return "BOOLEAN", false
case "null":
return "", true
default:
return t, false
}
}
switch t := rawType.(type) {
case string:
normalized, isNull := normalize(t)
if isNull {
schema["nullable"] = true
delete(schema, "type")
return
}
schema["type"] = normalized
case []interface{}:
nullable := false
var chosen string
for _, item := range t {
if s, ok := item.(string); ok {
normalized, isNull := normalize(s)
if isNull {
nullable = true
continue
}
if chosen == "" {
chosen = normalized
}
}
}
if nullable {
schema["nullable"] = true
}
if chosen != "" {
schema["type"] = chosen
} else {
delete(schema, "type")
}
}
}
func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interface{} {
if depth >= 5 {
return schema
@@ -879,11 +988,9 @@ func unescapeMapOrSlice(data interface{}) interface{} {
func getResponseToolCall(item *dto.GeminiPart) *dto.ToolCallResponse {
var argsBytes []byte
var err error
if result, ok := item.FunctionCall.Arguments.(map[string]interface{}); ok {
argsBytes, err = json.Marshal(unescapeMapOrSlice(result))
} else {
argsBytes, err = json.Marshal(item.FunctionCall.Arguments)
}
// 移除 unescapeMapOrSlice 调用,直接使用 json.Marshal
// JSON 序列化/反序列化已经正确处理了转义字符
argsBytes, err = json.Marshal(item.FunctionCall.Arguments)
if err != nil {
return nil
@@ -961,6 +1068,24 @@ func responseGeminiChat2OpenAI(c *gin.Context, response *dto.GeminiChatResponse)
choice.FinishReason = constant.FinishReasonStop
case "MAX_TOKENS":
choice.FinishReason = constant.FinishReasonLength
case "SAFETY":
// Safety filter triggered
choice.FinishReason = constant.FinishReasonContentFilter
case "RECITATION":
// Recitation (citation) detected
choice.FinishReason = constant.FinishReasonContentFilter
case "BLOCKLIST":
// Blocklist triggered
choice.FinishReason = constant.FinishReasonContentFilter
case "PROHIBITED_CONTENT":
// Prohibited content detected
choice.FinishReason = constant.FinishReasonContentFilter
case "SPII":
// Sensitive personally identifiable information
choice.FinishReason = constant.FinishReasonContentFilter
case "OTHER":
// Other reasons
choice.FinishReason = constant.FinishReasonContentFilter
default:
choice.FinishReason = constant.FinishReasonContentFilter
}
@@ -992,13 +1117,34 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *dto.GeminiChatResponse) (*d
isTools := false
isThought := false
if candidate.FinishReason != nil {
// p := GeminiConvertFinishReason(*candidate.FinishReason)
// Map Gemini FinishReason to OpenAI finish_reason
switch *candidate.FinishReason {
case "STOP":
// Normal completion
choice.FinishReason = &constant.FinishReasonStop
case "MAX_TOKENS":
// Reached maximum token limit
choice.FinishReason = &constant.FinishReasonLength
case "SAFETY":
// Safety filter triggered
choice.FinishReason = &constant.FinishReasonContentFilter
case "RECITATION":
// Recitation (citation) detected
choice.FinishReason = &constant.FinishReasonContentFilter
case "BLOCKLIST":
// Blocklist triggered
choice.FinishReason = &constant.FinishReasonContentFilter
case "PROHIBITED_CONTENT":
// Prohibited content detected
choice.FinishReason = &constant.FinishReasonContentFilter
case "SPII":
// Sensitive personally identifiable information
choice.FinishReason = &constant.FinishReasonContentFilter
case "OTHER":
// Other reasons
choice.FinishReason = &constant.FinishReasonContentFilter
default:
// Unknown reason, treat as content filter
choice.FinishReason = &constant.FinishReasonContentFilter
}
}
@@ -1081,6 +1227,10 @@ func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
return false
}
if len(geminiResponse.Candidates) == 0 && geminiResponse.PromptFeedback != nil && geminiResponse.PromptFeedback.BlockReason != nil {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, fmt.Sprintf("gemini_block_reason=%s", *geminiResponse.PromptFeedback.BlockReason))
}
// 统计图片数量
for _, candidate := range geminiResponse.Candidates {
for _, part := range candidate.Content.Parts {
@@ -1099,6 +1249,7 @@ func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
@@ -1138,6 +1289,8 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
id := helper.GetResponseID(c)
createAt := common.GetTimestamp()
finishReason := constant.FinishReasonStop
toolCallIndexByChoice := make(map[int]map[string]int)
nextToolCallIndexByChoice := make(map[int]int)
usage, err := geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
response, isStop := streamResponseGeminiChat2OpenAI(geminiResponse)
@@ -1145,6 +1298,28 @@ func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *
response.Id = id
response.Created = createAt
response.Model = info.UpstreamModelName
for choiceIdx := range response.Choices {
choiceKey := response.Choices[choiceIdx].Index
for toolIdx := range response.Choices[choiceIdx].Delta.ToolCalls {
tool := &response.Choices[choiceIdx].Delta.ToolCalls[toolIdx]
if tool.ID == "" {
continue
}
m := toolCallIndexByChoice[choiceKey]
if m == nil {
m = make(map[string]int)
toolCallIndexByChoice[choiceKey] = m
}
if idx, ok := m[tool.ID]; ok {
tool.SetIndex(idx)
continue
}
idx := nextToolCallIndexByChoice[choiceKey]
nextToolCallIndexByChoice[choiceKey] = idx + 1
m[tool.ID] = idx
tool.SetIndex(idx)
}
}
logger.LogDebug(c, fmt.Sprintf("info.SendResponseCount = %d", info.SendResponseCount))
if info.SendResponseCount == 0 {
@@ -1215,12 +1390,53 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if len(geminiResponse.Candidates) == 0 {
//return nil, types.NewOpenAIError(errors.New("no candidates returned"), types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
//if geminiResponse.PromptFeedback != nil && geminiResponse.PromptFeedback.BlockReason != nil {
// return nil, types.NewOpenAIError(errors.New("request blocked by Gemini API: "+*geminiResponse.PromptFeedback.BlockReason), types.ErrorCodePromptBlocked, http.StatusBadRequest)
//} else {
// return nil, types.NewOpenAIError(errors.New("empty response from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
//}
usage := dto.Usage{
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
}
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
} else if detail.Modality == "TEXT" {
usage.PromptTokensDetails.TextTokens = detail.TokenCount
}
}
if usage.PromptTokens <= 0 {
usage.PromptTokens = info.GetEstimatePromptTokens()
}
var newAPIError *types.NewAPIError
if geminiResponse.PromptFeedback != nil && geminiResponse.PromptFeedback.BlockReason != nil {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, fmt.Sprintf("gemini_block_reason=%s", *geminiResponse.PromptFeedback.BlockReason))
newAPIError = types.NewOpenAIError(
errors.New("request blocked by Gemini API: "+*geminiResponse.PromptFeedback.BlockReason),
types.ErrorCodePromptBlocked,
http.StatusBadRequest,
)
} else {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, "gemini_empty_candidates")
newAPIError = types.NewOpenAIError(
errors.New("empty response from Gemini API"),
types.ErrorCodeEmptyResponse,
http.StatusInternalServerError,
)
}
service.ResetStatusCode(newAPIError, c.GetString("status_code_mapping"))
switch info.RelayFormat {
case types.RelayFormatClaude:
c.JSON(newAPIError.StatusCode, gin.H{
"type": "error",
"error": newAPIError.ToClaudeError(),
})
default:
c.JSON(newAPIError.StatusCode, gin.H{
"error": newAPIError.ToOpenAIError(),
})
}
return &usage, nil
}
fullTextResponse := responseGeminiChat2OpenAI(c, &geminiResponse)
fullTextResponse.Model = info.UpstreamModelName
@@ -1231,6 +1447,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
}
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
@@ -1363,3 +1580,135 @@ func GeminiImageHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.
return usage, nil
}
type GeminiModelsResponse struct {
Models []dto.GeminiModel `json:"models"`
NextPageToken string `json:"nextPageToken"`
}
func FetchGeminiModels(baseURL, apiKey, proxyURL string) ([]string, error) {
client, err := service.GetHttpClientWithProxy(proxyURL)
if err != nil {
return nil, fmt.Errorf("创建HTTP客户端失败: %v", err)
}
allModels := make([]string, 0)
nextPageToken := ""
maxPages := 100 // Safety limit to prevent infinite loops
for page := 0; page < maxPages; page++ {
url := fmt.Sprintf("%s/v1beta/models", baseURL)
if nextPageToken != "" {
url = fmt.Sprintf("%s?pageToken=%s", url, nextPageToken)
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
request, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
cancel()
return nil, fmt.Errorf("创建请求失败: %v", err)
}
request.Header.Set("x-goog-api-key", apiKey)
response, err := client.Do(request)
if err != nil {
cancel()
return nil, fmt.Errorf("请求失败: %v", err)
}
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(response.Body)
response.Body.Close()
cancel()
return nil, fmt.Errorf("服务器返回错误 %d: %s", response.StatusCode, string(body))
}
body, err := io.ReadAll(response.Body)
response.Body.Close()
cancel()
if err != nil {
return nil, fmt.Errorf("读取响应失败: %v", err)
}
var modelsResponse GeminiModelsResponse
if err = common.Unmarshal(body, &modelsResponse); err != nil {
return nil, fmt.Errorf("解析响应失败: %v", err)
}
for _, model := range modelsResponse.Models {
modelNameValue, ok := model.Name.(string)
if !ok {
continue
}
modelName := strings.TrimPrefix(modelNameValue, "models/")
allModels = append(allModels, modelName)
}
nextPageToken = modelsResponse.NextPageToken
if nextPageToken == "" {
break
}
}
return allModels, nil
}
// convertToolChoiceToGeminiConfig converts OpenAI tool_choice to Gemini toolConfig
// OpenAI tool_choice values:
// - "auto": Let the model decide (default)
// - "none": Don't call any tools
// - "required": Must call at least one tool
// - {"type": "function", "function": {"name": "xxx"}}: Call specific function
//
// Gemini functionCallingConfig.mode values:
// - "AUTO": Model decides whether to call functions
// - "NONE": Model won't call functions
// - "ANY": Model must call at least one function
func convertToolChoiceToGeminiConfig(toolChoice any) *dto.ToolConfig {
if toolChoice == nil {
return nil
}
// Handle string values: "auto", "none", "required"
if toolChoiceStr, ok := toolChoice.(string); ok {
config := &dto.ToolConfig{
FunctionCallingConfig: &dto.FunctionCallingConfig{},
}
switch toolChoiceStr {
case "auto":
config.FunctionCallingConfig.Mode = "AUTO"
case "none":
config.FunctionCallingConfig.Mode = "NONE"
case "required":
config.FunctionCallingConfig.Mode = "ANY"
default:
// Unknown string value, default to AUTO
config.FunctionCallingConfig.Mode = "AUTO"
}
return config
}
// Handle object value: {"type": "function", "function": {"name": "xxx"}}
if toolChoiceMap, ok := toolChoice.(map[string]interface{}); ok {
if toolChoiceMap["type"] == "function" {
config := &dto.ToolConfig{
FunctionCallingConfig: &dto.FunctionCallingConfig{
Mode: "ANY",
},
}
// Extract function name if specified
if function, ok := toolChoiceMap["function"].(map[string]interface{}); ok {
if name, ok := function["name"].(string); ok && name != "" {
config.FunctionCallingConfig.AllowedFunctionNames = []string{name}
}
}
return config
}
// Unsupported map structure (type is not "function"), return nil
return nil
}
// Unsupported type, return nil
return nil
}

View File

@@ -14,6 +14,9 @@ var ModelList = []string{
"speech-02-turbo",
"speech-01-hd",
"speech-01-turbo",
"MiniMax-M2.1",
"MiniMax-M2.1-lightning",
"MiniMax-M2",
}
var ChannelName = "minimax"

View File

@@ -187,6 +187,17 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, header *http.Header, info *
if info.ChannelType == constant.ChannelTypeOpenAI && "" != info.Organization {
header.Set("OpenAI-Organization", info.Organization)
}
// 检查 Header Override 是否已设置 Authorization如果已设置则跳过默认设置
// 这样可以避免在 Header Override 应用时被覆盖(虽然 Header Override 会在之后应用,但这里作为额外保护)
hasAuthOverride := false
if len(info.HeadersOverride) > 0 {
for k := range info.HeadersOverride {
if strings.EqualFold(k, "Authorization") {
hasAuthOverride = true
break
}
}
}
if info.RelayMode == relayconstant.RelayModeRealtime {
swp := c.Request.Header.Get("Sec-WebSocket-Protocol")
if swp != "" {
@@ -201,10 +212,14 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, header *http.Header, info *
//req.Header.Set("Sec-Websocket-Version", c.Request.Header.Get("Sec-Websocket-Version"))
} else {
header.Set("openai-beta", "realtime=v1")
header.Set("Authorization", "Bearer "+info.ApiKey)
if !hasAuthOverride {
header.Set("Authorization", "Bearer "+info.ApiKey)
}
}
} else {
header.Set("Authorization", "Bearer "+info.ApiKey)
if !hasAuthOverride {
header.Set("Authorization", "Bearer "+info.ApiKey)
}
}
if info.ChannelType == constant.ChannelTypeOpenRouter {
header.Set("HTTP-Referer", "https://www.newapi.ai")
@@ -605,6 +620,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
} else {
usage, err = OaiResponsesHandler(c, info, resp)
}
case relayconstant.RelayModeResponsesCompact:
usage, err = OaiResponsesCompactionHandler(c, resp)
default:
if info.IsStream {
usage, err = OaiStreamHandler(c, info, resp)

View File

@@ -0,0 +1,369 @@
package openai
import (
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
func OaiResponsesToChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
if resp == nil || resp.Body == nil {
return nil, types.NewOpenAIError(fmt.Errorf("invalid response"), types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
defer service.CloseResponseBodyGracefully(resp)
var responsesResp dto.OpenAIResponsesResponse
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
}
if err := common.Unmarshal(body, &responsesResp); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if oaiError := responsesResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
}
chatId := helper.GetResponseID(c)
chatResp, usage, err := service.ResponsesResponseToChatCompletionsResponse(&responsesResp, chatId)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if usage == nil || usage.TotalTokens == 0 {
text := service.ExtractOutputTextFromResponses(&responsesResp)
usage = service.ResponseText2Usage(c, text, info.UpstreamModelName, info.GetEstimatePromptTokens())
chatResp.Usage = *usage
}
chatBody, err := common.Marshal(chatResp)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeJsonMarshalFailed, http.StatusInternalServerError)
}
service.IOCopyBytesGracefully(c, resp, chatBody)
return usage, nil
}
func OaiResponsesToChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
if resp == nil || resp.Body == nil {
return nil, types.NewOpenAIError(fmt.Errorf("invalid response"), types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
defer service.CloseResponseBodyGracefully(resp)
responseId := helper.GetResponseID(c)
createAt := time.Now().Unix()
model := info.UpstreamModelName
var (
usage = &dto.Usage{}
outputText strings.Builder
usageText strings.Builder
sentStart bool
sentStop bool
sawToolCall bool
streamErr *types.NewAPIError
)
toolCallIndexByID := make(map[string]int)
toolCallNameByID := make(map[string]string)
toolCallArgsByID := make(map[string]string)
toolCallNameSent := make(map[string]bool)
toolCallCanonicalIDByItemID := make(map[string]string)
sendStartIfNeeded := func() bool {
if sentStart {
return true
}
if err := helper.ObjectData(c, helper.GenerateStartEmptyResponse(responseId, createAt, model, nil)); err != nil {
streamErr = types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
return false
}
sentStart = true
return true
}
sendToolCallDelta := func(callID string, name string, argsDelta string) bool {
if callID == "" {
return true
}
if outputText.Len() > 0 {
// Prefer streaming assistant text over tool calls to match non-stream behavior.
return true
}
if !sendStartIfNeeded() {
return false
}
idx, ok := toolCallIndexByID[callID]
if !ok {
idx = len(toolCallIndexByID)
toolCallIndexByID[callID] = idx
}
if name != "" {
toolCallNameByID[callID] = name
}
if toolCallNameByID[callID] != "" {
name = toolCallNameByID[callID]
}
tool := dto.ToolCallResponse{
ID: callID,
Type: "function",
Function: dto.FunctionResponse{
Arguments: argsDelta,
},
}
tool.SetIndex(idx)
if name != "" && !toolCallNameSent[callID] {
tool.Function.Name = name
toolCallNameSent[callID] = true
}
chunk := &dto.ChatCompletionsStreamResponse{
Id: responseId,
Object: "chat.completion.chunk",
Created: createAt,
Model: model,
Choices: []dto.ChatCompletionsStreamResponseChoice{
{
Index: 0,
Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
ToolCalls: []dto.ToolCallResponse{tool},
},
},
},
}
if err := helper.ObjectData(c, chunk); err != nil {
streamErr = types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
return false
}
sawToolCall = true
// Include tool call data in the local builder for fallback token estimation.
if tool.Function.Name != "" {
usageText.WriteString(tool.Function.Name)
}
if argsDelta != "" {
usageText.WriteString(argsDelta)
}
return true
}
helper.StreamScannerHandler(c, resp, info, func(data string) bool {
if streamErr != nil {
return false
}
var streamResp dto.ResponsesStreamResponse
if err := common.UnmarshalJsonStr(data, &streamResp); err != nil {
logger.LogError(c, "failed to unmarshal responses stream event: "+err.Error())
return true
}
switch streamResp.Type {
case "response.created":
if streamResp.Response != nil {
if streamResp.Response.Model != "" {
model = streamResp.Response.Model
}
if streamResp.Response.CreatedAt != 0 {
createAt = int64(streamResp.Response.CreatedAt)
}
}
case "response.output_text.delta":
if !sendStartIfNeeded() {
return false
}
if streamResp.Delta != "" {
outputText.WriteString(streamResp.Delta)
usageText.WriteString(streamResp.Delta)
delta := streamResp.Delta
chunk := &dto.ChatCompletionsStreamResponse{
Id: responseId,
Object: "chat.completion.chunk",
Created: createAt,
Model: model,
Choices: []dto.ChatCompletionsStreamResponseChoice{
{
Index: 0,
Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
Content: &delta,
},
},
},
}
if err := helper.ObjectData(c, chunk); err != nil {
streamErr = types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
return false
}
}
case "response.output_item.added", "response.output_item.done":
if streamResp.Item == nil {
break
}
if streamResp.Item.Type != "function_call" {
break
}
itemID := strings.TrimSpace(streamResp.Item.ID)
callID := strings.TrimSpace(streamResp.Item.CallId)
if callID == "" {
callID = itemID
}
if itemID != "" && callID != "" {
toolCallCanonicalIDByItemID[itemID] = callID
}
name := strings.TrimSpace(streamResp.Item.Name)
if name != "" {
toolCallNameByID[callID] = name
}
newArgs := streamResp.Item.Arguments
prevArgs := toolCallArgsByID[callID]
argsDelta := ""
if newArgs != "" {
if strings.HasPrefix(newArgs, prevArgs) {
argsDelta = newArgs[len(prevArgs):]
} else {
argsDelta = newArgs
}
toolCallArgsByID[callID] = newArgs
}
if !sendToolCallDelta(callID, name, argsDelta) {
return false
}
case "response.function_call_arguments.delta":
itemID := strings.TrimSpace(streamResp.ItemID)
callID := toolCallCanonicalIDByItemID[itemID]
if callID == "" {
callID = itemID
}
if callID == "" {
break
}
toolCallArgsByID[callID] += streamResp.Delta
if !sendToolCallDelta(callID, "", streamResp.Delta) {
return false
}
case "response.function_call_arguments.done":
case "response.completed":
if streamResp.Response != nil {
if streamResp.Response.Model != "" {
model = streamResp.Response.Model
}
if streamResp.Response.CreatedAt != 0 {
createAt = int64(streamResp.Response.CreatedAt)
}
if streamResp.Response.Usage != nil {
if streamResp.Response.Usage.InputTokens != 0 {
usage.PromptTokens = streamResp.Response.Usage.InputTokens
usage.InputTokens = streamResp.Response.Usage.InputTokens
}
if streamResp.Response.Usage.OutputTokens != 0 {
usage.CompletionTokens = streamResp.Response.Usage.OutputTokens
usage.OutputTokens = streamResp.Response.Usage.OutputTokens
}
if streamResp.Response.Usage.TotalTokens != 0 {
usage.TotalTokens = streamResp.Response.Usage.TotalTokens
} else {
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
}
if streamResp.Response.Usage.InputTokensDetails != nil {
usage.PromptTokensDetails.CachedTokens = streamResp.Response.Usage.InputTokensDetails.CachedTokens
usage.PromptTokensDetails.ImageTokens = streamResp.Response.Usage.InputTokensDetails.ImageTokens
usage.PromptTokensDetails.AudioTokens = streamResp.Response.Usage.InputTokensDetails.AudioTokens
}
if streamResp.Response.Usage.CompletionTokenDetails.ReasoningTokens != 0 {
usage.CompletionTokenDetails.ReasoningTokens = streamResp.Response.Usage.CompletionTokenDetails.ReasoningTokens
}
}
}
if !sendStartIfNeeded() {
return false
}
if !sentStop {
finishReason := "stop"
if sawToolCall && outputText.Len() == 0 {
finishReason = "tool_calls"
}
stop := helper.GenerateStopResponse(responseId, createAt, model, finishReason)
if err := helper.ObjectData(c, stop); err != nil {
streamErr = types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
return false
}
sentStop = true
}
case "response.error", "response.failed":
if streamResp.Response != nil {
if oaiErr := streamResp.Response.GetOpenAIError(); oaiErr != nil && oaiErr.Type != "" {
streamErr = types.WithOpenAIError(*oaiErr, http.StatusInternalServerError)
return false
}
}
streamErr = types.NewOpenAIError(fmt.Errorf("responses stream error: %s", streamResp.Type), types.ErrorCodeBadResponse, http.StatusInternalServerError)
return false
default:
}
return true
})
if streamErr != nil {
return nil, streamErr
}
if usage.TotalTokens == 0 {
usage = service.ResponseText2Usage(c, usageText.String(), info.UpstreamModelName, info.GetEstimatePromptTokens())
}
if !sentStart {
if err := helper.ObjectData(c, helper.GenerateStartEmptyResponse(responseId, createAt, model, nil)); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
}
if !sentStop {
finishReason := "stop"
if sawToolCall && outputText.Len() == 0 {
finishReason = "tool_calls"
}
stop := helper.GenerateStopResponse(responseId, createAt, model, finishReason)
if err := helper.ObjectData(c, stop); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
}
if info.ShouldIncludeUsage && usage != nil {
if err := helper.ObjectData(c, helper.GenerateFinalUsageResponse(responseId, createAt, model, *usage)); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
}
helper.Done(c)
return usage, nil
}

View File

@@ -229,6 +229,13 @@ func OpenaiHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Respo
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
}
for _, choice := range simpleResponse.Choices {
if choice.FinishReason == constant.FinishReasonContentFilter {
common.SetContextKey(c, constant.ContextKeyAdminRejectReason, "openai_finish_reason=content_filter")
break
}
}
forceFormat := false
if info.ChannelSetting.ForceFormat {
forceFormat = true

View File

@@ -0,0 +1,44 @@
package openai
import (
"io"
"net/http"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
func OaiResponsesCompactionHandler(c *gin.Context, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
defer service.CloseResponseBodyGracefully(resp)
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
}
var compactResp dto.OpenAIResponsesCompactionResponse
if err := common.Unmarshal(responseBody, &compactResp); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}
if oaiError := compactResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
}
service.IOCopyBytesGracefully(c, resp, responseBody)
usage := dto.Usage{}
if compactResp.Usage != nil {
usage.PromptTokens = compactResp.Usage.InputTokens
usage.CompletionTokens = compactResp.Usage.OutputTokens
usage.TotalTokens = compactResp.Usage.TotalTokens
if compactResp.Usage.InputTokensDetails != nil {
usage.PromptTokensDetails.CachedTokens = compactResp.Usage.InputTokensDetails.CachedTokens
}
}
return &usage, nil
}

View File

@@ -10,6 +10,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
@@ -42,6 +43,9 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
}
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
if info.RelayMode == relayconstant.RelayModeResponses {
return fmt.Sprintf("%s/v1/responses", info.ChannelBaseUrl), nil
}
return fmt.Sprintf("%s/chat/completions", info.ChannelBaseUrl), nil
}
@@ -71,8 +75,7 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
}
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
// TODO implement me
return nil, errors.New("not implemented")
return request, nil
}
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {

View File

@@ -6,6 +6,9 @@ import (
"fmt"
"io"
"net/http"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
@@ -23,18 +26,37 @@ import (
// ============================
type ContentItem struct {
Type string `json:"type"` // "text" or "image_url"
Text string `json:"text,omitempty"` // for text type
ImageURL *ImageURL `json:"image_url,omitempty"` // for image_url type
Type string `json:"type"` // "text", "image_url" or "video"
Text string `json:"text,omitempty"` // for text type
ImageURL *ImageURL `json:"image_url,omitempty"` // for image_url type
Video *VideoReference `json:"video,omitempty"` // for video (sample) type
Role string `json:"role,omitempty"` // reference_image / first_frame / last_frame
}
type ImageURL struct {
URL string `json:"url"`
}
type VideoReference struct {
URL string `json:"url"` // Draft video URL
}
type requestPayload struct {
Model string `json:"model"`
Content []ContentItem `json:"content"`
Model string `json:"model"`
Content []ContentItem `json:"content"`
CallbackURL string `json:"callback_url,omitempty"`
ReturnLastFrame *dto.BoolValue `json:"return_last_frame,omitempty"`
ServiceTier string `json:"service_tier,omitempty"`
ExecutionExpiresAfter dto.IntValue `json:"execution_expires_after,omitempty"`
GenerateAudio *dto.BoolValue `json:"generate_audio,omitempty"`
Draft *dto.BoolValue `json:"draft,omitempty"`
Resolution string `json:"resolution,omitempty"`
Ratio string `json:"ratio,omitempty"`
Duration dto.IntValue `json:"duration,omitempty"`
Frames dto.IntValue `json:"frames,omitempty"`
Seed dto.IntValue `json:"seed,omitempty"`
CameraFixed *dto.BoolValue `json:"camera_fixed,omitempty"`
Watermark *dto.BoolValue `json:"watermark,omitempty"`
}
type responsePayload struct {
@@ -53,6 +75,7 @@ type responseTask struct {
Duration int `json:"duration"`
Ratio string `json:"ratio"`
FramesPerSecond int `json:"framespersecond"`
ServiceTier string `json:"service_tier"`
Usage struct {
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
@@ -98,16 +121,16 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
// BuildRequestBody converts request into Doubao specific format.
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
v, exists := c.Get("task_request")
if !exists {
return nil, fmt.Errorf("request not found in context")
req, err := relaycommon.GetTaskRequest(c)
if err != nil {
return nil, err
}
req := v.(relaycommon.TaskSubmitReq)
body, err := a.convertToRequestPayload(&req)
if err != nil {
return nil, errors.Wrap(err, "convert request payload failed")
}
info.UpstreamModelName = body.Model
data, err := json.Marshal(body)
if err != nil {
return nil, err
@@ -141,7 +164,13 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
return
}
c.JSON(http.StatusOK, gin.H{"task_id": dResp.ID})
ov := dto.NewOpenAIVideo()
ov.ID = dResp.ID
ov.TaskID = dResp.ID
ov.CreatedAt = time.Now().Unix()
ov.Model = info.OriginModelName
c.JSON(http.StatusOK, ov)
return dResp.ID, responseBody, nil
}
@@ -204,12 +233,15 @@ func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*
}
}
// TODO: Add support for additional parameters from metadata
// such as ratio, duration, seed, etc.
// metadata := req.Metadata
// if metadata != nil {
// // Parse and apply metadata parameters
// }
metadata := req.Metadata
medaBytes, err := json.Marshal(metadata)
if err != nil {
return nil, errors.Wrap(err, "metadata marshal metadata failed")
}
err = json.Unmarshal(medaBytes, &r)
if err != nil {
return nil, errors.Wrap(err, "unmarshal metadata failed")
}
return &r, nil
}
@@ -229,7 +261,7 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e
case "pending", "queued":
taskResult.Status = model.TaskStatusQueued
taskResult.Progress = "10%"
case "processing":
case "processing", "running":
taskResult.Status = model.TaskStatusInProgress
taskResult.Progress = "50%"
case "succeeded":
@@ -251,3 +283,30 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e
return &taskResult, nil
}
func (a *TaskAdaptor) ConvertToOpenAIVideo(originTask *model.Task) ([]byte, error) {
var dResp responseTask
if err := json.Unmarshal(originTask.Data, &dResp); err != nil {
return nil, errors.Wrap(err, "unmarshal doubao task data failed")
}
openAIVideo := dto.NewOpenAIVideo()
openAIVideo.ID = originTask.TaskID
openAIVideo.TaskID = originTask.TaskID
openAIVideo.Status = originTask.Status.ToVideoStatus()
openAIVideo.SetProgressStr(originTask.Progress)
openAIVideo.SetMetadata("url", dResp.Content.VideoURL)
openAIVideo.CreatedAt = originTask.CreatedAt
openAIVideo.CompletedAt = originTask.UpdatedAt
openAIVideo.Model = originTask.Properties.OriginModelName
if dResp.Status == "failed" {
openAIVideo.Error = &dto.OpenAIVideoError{
Message: "task failed",
Code: "failed",
}
}
jsonData, _ := common.Marshal(openAIVideo)
return jsonData, nil
}

View File

@@ -4,6 +4,7 @@ var ModelList = []string{
"doubao-seedance-1-0-pro-250528",
"doubao-seedance-1-0-lite-t2v",
"doubao-seedance-1-0-lite-i2v",
"doubao-seedance-1-5-pro-251215",
}
var ChannelName = "doubao-video"

View File

@@ -17,6 +17,7 @@ import (
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/model"
"github.com/samber/lo"
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
@@ -409,14 +410,15 @@ func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*
// 即梦视频3.0 ReqKey转换
// https://www.volcengine.com/docs/85621/1792707
imageLen := lo.Max([]int{len(req.Images), len(r.BinaryDataBase64), len(r.ImageUrls)})
if strings.Contains(r.ReqKey, "jimeng_v30") {
if r.ReqKey == "jimeng_v30_pro" {
// 3.0 pro只有固定的jimeng_ti2v_v30_pro
r.ReqKey = "jimeng_ti2v_v30_pro"
} else if len(req.Images) > 1 {
} else if imageLen > 1 {
// 多张图片:首尾帧生成
r.ReqKey = strings.TrimSuffix(strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_tail_v30", 1), "p")
} else if len(req.Images) == 1 {
} else if imageLen == 1 {
// 单张图片:图生视频
r.ReqKey = strings.TrimSuffix(strings.Replace(r.ReqKey, "jimeng_v30", "jimeng_i2v_first_v30", 1), "p")
} else {

View File

@@ -24,9 +24,9 @@ import (
)
const (
RequestModeClaude = 1
RequestModeGemini = 2
RequestModeLlama = 3
RequestModeClaude = 1
RequestModeGemini = 2
RequestModeOpenSource = 3
)
var claudeModelMap = map[string]string{
@@ -115,7 +115,7 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
} else if strings.Contains(info.UpstreamModelName, "llama") ||
// open source models
strings.Contains(info.UpstreamModelName, "-maas") {
a.RequestMode = RequestModeLlama
a.RequestMode = RequestModeOpenSource
} else {
a.RequestMode = RequestModeGemini
}
@@ -166,10 +166,9 @@ func (a *Adaptor) getRequestUrl(info *relaycommon.RelayInfo, modelName, suffix s
suffix,
), nil
}
} else if a.RequestMode == RequestModeLlama {
} else if a.RequestMode == RequestModeOpenSource {
return fmt.Sprintf(
"https://%s-aiplatform.googleapis.com/v1beta1/projects/%s/locations/%s/endpoints/openapi/chat/completions",
region,
"https://aiplatform.googleapis.com/v1beta1/projects/%s/locations/%s/endpoints/openapi/chat/completions",
adc.ProjectID,
region,
), nil
@@ -242,7 +241,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
model = v
}
return a.getRequestUrl(info, model, suffix)
} else if a.RequestMode == RequestModeLlama {
} else if a.RequestMode == RequestModeOpenSource {
return a.getRequestUrl(info, "", "")
}
return "", errors.New("unsupported request mode")
@@ -340,7 +339,7 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
}
c.Set("request_model", request.Model)
return geminiRequest, nil
} else if a.RequestMode == RequestModeLlama {
} else if a.RequestMode == RequestModeOpenSource {
return request, nil
}
return nil, errors.New("unsupported request mode")
@@ -375,7 +374,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
} else {
return gemini.GeminiChatStreamHandler(c, info, resp)
}
case RequestModeLlama:
case RequestModeOpenSource:
return openai.OaiStreamHandler(c, info, resp)
}
} else {
@@ -391,7 +390,7 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
}
return gemini.GeminiChatHandler(c, info, resp)
}
case RequestModeLlama:
case RequestModeOpenSource:
return openai.OpenaiHandler(c, info, resp)
}
}

View File

@@ -0,0 +1,162 @@
package relay
import (
"bytes"
"net/http"
"strings"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/channel"
openaichannel "github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
func applySystemPromptIfNeeded(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) {
if info == nil || request == nil {
return
}
if info.ChannelSetting.SystemPrompt == "" {
return
}
systemRole := request.GetSystemRoleName()
containSystemPrompt := false
for _, message := range request.Messages {
if message.Role == systemRole {
containSystemPrompt = true
break
}
}
if !containSystemPrompt {
systemMessage := dto.Message{
Role: systemRole,
Content: info.ChannelSetting.SystemPrompt,
}
request.Messages = append([]dto.Message{systemMessage}, request.Messages...)
return
}
if !info.ChannelSetting.SystemPromptOverride {
return
}
common.SetContextKey(c, constant.ContextKeySystemPromptOverride, true)
for i, message := range request.Messages {
if message.Role != systemRole {
continue
}
if message.IsStringContent() {
request.Messages[i].SetStringContent(info.ChannelSetting.SystemPrompt + "\n" + message.StringContent())
return
}
contents := message.ParseContent()
contents = append([]dto.MediaContent{
{
Type: dto.ContentTypeText,
Text: info.ChannelSetting.SystemPrompt,
},
}, contents...)
request.Messages[i].Content = contents
return
}
}
func chatCompletionsViaResponses(c *gin.Context, info *relaycommon.RelayInfo, adaptor channel.Adaptor, request *dto.GeneralOpenAIRequest) (*dto.Usage, *types.NewAPIError) {
overrideCtx := relaycommon.BuildParamOverrideContext(info)
chatJSON, err := common.Marshal(request)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
chatJSON, err = relaycommon.RemoveDisabledFields(chatJSON, info.ChannelOtherSettings)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
if len(info.ParamOverride) > 0 {
chatJSON, err = relaycommon.ApplyParamOverride(chatJSON, info.ParamOverride, overrideCtx)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeChannelParamOverrideInvalid, types.ErrOptionWithSkipRetry())
}
}
var overriddenChatReq dto.GeneralOpenAIRequest
if err := common.Unmarshal(chatJSON, &overriddenChatReq); err != nil {
return nil, types.NewError(err, types.ErrorCodeChannelParamOverrideInvalid, types.ErrOptionWithSkipRetry())
}
responsesReq, err := service.ChatCompletionsRequestToResponsesRequest(&overriddenChatReq)
if err != nil {
return nil, types.NewErrorWithStatusCode(err, types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
}
info.AppendRequestConversion(types.RelayFormatOpenAIResponses)
savedRelayMode := info.RelayMode
savedRequestURLPath := info.RequestURLPath
defer func() {
info.RelayMode = savedRelayMode
info.RequestURLPath = savedRequestURLPath
}()
info.RelayMode = relayconstant.RelayModeResponses
info.RequestURLPath = "/v1/responses"
convertedRequest, err := adaptor.ConvertOpenAIResponsesRequest(c, info, *responsesReq)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
jsonData, err = relaycommon.RemoveDisabledFields(jsonData, info.ChannelOtherSettings)
if err != nil {
return nil, types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
var httpResp *http.Response
resp, err := adaptor.DoRequest(c, info, bytes.NewBuffer(jsonData))
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeDoRequestFailed, http.StatusInternalServerError)
}
if resp == nil {
return nil, types.NewOpenAIError(nil, types.ErrorCodeBadResponse, http.StatusInternalServerError)
}
statusCodeMappingStr := c.GetString("status_code_mapping")
httpResp = resp.(*http.Response)
info.IsStream = info.IsStream || strings.HasPrefix(httpResp.Header.Get("Content-Type"), "text/event-stream")
if httpResp.StatusCode != http.StatusOK {
newApiErr := service.RelayErrorHandler(c.Request.Context(), httpResp, false)
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
if info.IsStream {
usage, newApiErr := openaichannel.OaiResponsesToChatStreamHandler(c, info, httpResp)
if newApiErr != nil {
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
return usage, nil
}
usage, newApiErr := openaichannel.OaiResponsesToChatHandler(c, info, httpResp)
if newApiErr != nil {
service.ResetStatusCode(newApiErr, statusCodeMappingStr)
return nil, newApiErr
}
return usage, nil
}

View File

@@ -110,6 +110,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())

View File

@@ -570,18 +570,19 @@ func mergeObjects(jsonStr, path string, value interface{}, keepOrigin bool) (str
// BuildParamOverrideContext 提供 ApplyParamOverride 可用的上下文信息。
// 目前内置以下字段:
// - model优先使用上游模型名UpstreamModelName若不存在则回落到原始模型名OriginModelName
// - upstream_model始终为通道映射后的上游模型名。
// - upstream_model/model始终为通道映射后的上游模型名
// - original_model请求最初指定的模型名。
// - request_path请求路径
// - is_channel_test是否为渠道测试请求同 is_test
func BuildParamOverrideContext(info *RelayInfo) map[string]interface{} {
if info == nil || info.ChannelMeta == nil {
if info == nil {
return nil
}
ctx := make(map[string]interface{})
if info.UpstreamModelName != "" {
ctx["model"] = info.UpstreamModelName
ctx["upstream_model"] = info.UpstreamModelName
if info.ChannelMeta != nil && info.ChannelMeta.UpstreamModelName != "" {
ctx["model"] = info.ChannelMeta.UpstreamModelName
ctx["upstream_model"] = info.ChannelMeta.UpstreamModelName
}
if info.OriginModelName != "" {
ctx["original_model"] = info.OriginModelName
@@ -590,8 +591,13 @@ func BuildParamOverrideContext(info *RelayInfo) map[string]interface{} {
}
}
if len(ctx) == 0 {
return nil
if info.RequestURLPath != "" {
requestPath := info.RequestURLPath
if requestPath != "" {
ctx["request_path"] = requestPath
}
}
ctx["is_channel_test"] = info.IsChannelTest
return ctx
}

View File

@@ -115,11 +115,16 @@ type RelayInfo struct {
SendResponseCount int
FinalPreConsumedQuota int // 最终预消耗的配额
IsClaudeBetaQuery bool // /v1/messages?beta=true
IsChannelTest bool // channel test request
PriceData types.PriceData
Request dto.Request
// RequestConversionChain records request format conversions in order, e.g.
// ["openai", "openai_responses"] or ["openai", "claude"].
RequestConversionChain []types.RelayFormat
ThinkingContentInfo
TokenCountMeta
*ClaudeConvertInfo
@@ -273,6 +278,7 @@ var streamSupportedChannels = map[int]bool{
constant.ChannelTypeZhipu_v4: true,
constant.ChannelTypeAli: true,
constant.ChannelTypeSubmodel: true,
constant.ChannelTypeCodex: true,
}
func GenRelayInfoWs(c *gin.Context, ws *websocket.Conn) *RelayInfo {
@@ -446,38 +452,97 @@ func genBaseRelayInfo(c *gin.Context, request dto.Request) *RelayInfo {
}
func GenRelayInfo(c *gin.Context, relayFormat types.RelayFormat, request dto.Request, ws *websocket.Conn) (*RelayInfo, error) {
var info *RelayInfo
var err error
switch relayFormat {
case types.RelayFormatOpenAI:
return GenRelayInfoOpenAI(c, request), nil
info = GenRelayInfoOpenAI(c, request)
case types.RelayFormatOpenAIAudio:
return GenRelayInfoOpenAIAudio(c, request), nil
info = GenRelayInfoOpenAIAudio(c, request)
case types.RelayFormatOpenAIImage:
return GenRelayInfoImage(c, request), nil
info = GenRelayInfoImage(c, request)
case types.RelayFormatOpenAIRealtime:
return GenRelayInfoWs(c, ws), nil
info = GenRelayInfoWs(c, ws)
case types.RelayFormatClaude:
return GenRelayInfoClaude(c, request), nil
info = GenRelayInfoClaude(c, request)
case types.RelayFormatRerank:
if request, ok := request.(*dto.RerankRequest); ok {
return GenRelayInfoRerank(c, request), nil
info = GenRelayInfoRerank(c, request)
break
}
return nil, errors.New("request is not a RerankRequest")
err = errors.New("request is not a RerankRequest")
case types.RelayFormatGemini:
return GenRelayInfoGemini(c, request), nil
info = GenRelayInfoGemini(c, request)
case types.RelayFormatEmbedding:
return GenRelayInfoEmbedding(c, request), nil
info = GenRelayInfoEmbedding(c, request)
case types.RelayFormatOpenAIResponses:
if request, ok := request.(*dto.OpenAIResponsesRequest); ok {
return GenRelayInfoResponses(c, request), nil
info = GenRelayInfoResponses(c, request)
break
}
return nil, errors.New("request is not a OpenAIResponsesRequest")
err = errors.New("request is not a OpenAIResponsesRequest")
case types.RelayFormatOpenAIResponsesCompaction:
if request, ok := request.(*dto.OpenAIResponsesCompactionRequest); ok {
return GenRelayInfoResponsesCompaction(c, request), nil
}
return nil, errors.New("request is not a OpenAIResponsesCompactionRequest")
case types.RelayFormatTask:
return genBaseRelayInfo(c, nil), nil
info = genBaseRelayInfo(c, nil)
case types.RelayFormatMjProxy:
return genBaseRelayInfo(c, nil), nil
info = genBaseRelayInfo(c, nil)
default:
return nil, errors.New("invalid relay format")
err = errors.New("invalid relay format")
}
if err != nil {
return nil, err
}
if info == nil {
return nil, errors.New("failed to build relay info")
}
info.InitRequestConversionChain()
return info, nil
}
func (info *RelayInfo) InitRequestConversionChain() {
if info == nil {
return
}
if len(info.RequestConversionChain) > 0 {
return
}
if info.RelayFormat == "" {
return
}
info.RequestConversionChain = []types.RelayFormat{info.RelayFormat}
}
func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) {
if info == nil {
return
}
if format == "" {
return
}
if len(info.RequestConversionChain) == 0 {
info.RequestConversionChain = []types.RelayFormat{format}
return
}
last := info.RequestConversionChain[len(info.RequestConversionChain)-1]
if last == format {
return
}
info.RequestConversionChain = append(info.RequestConversionChain, format)
}
func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
info := genBaseRelayInfo(c, request)
if info.RelayMode == relayconstant.RelayModeUnknown {
info.RelayMode = relayconstant.RelayModeResponsesCompact
}
info.RelayFormat = types.RelayFormatOpenAIResponsesCompaction
return info
}
//func (info *RelayInfo) SetPromptTokens(promptTokens int) {

View File

@@ -0,0 +1,40 @@
package common
import (
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/types"
)
func GuessRelayFormatFromRequest(req any) (types.RelayFormat, bool) {
switch req.(type) {
case *dto.GeneralOpenAIRequest, dto.GeneralOpenAIRequest:
return types.RelayFormatOpenAI, true
case *dto.OpenAIResponsesRequest, dto.OpenAIResponsesRequest:
return types.RelayFormatOpenAIResponses, true
case *dto.ClaudeRequest, dto.ClaudeRequest:
return types.RelayFormatClaude, true
case *dto.GeminiChatRequest, dto.GeminiChatRequest:
return types.RelayFormatGemini, true
case *dto.EmbeddingRequest, dto.EmbeddingRequest:
return types.RelayFormatEmbedding, true
case *dto.RerankRequest, dto.RerankRequest:
return types.RelayFormatRerank, true
case *dto.ImageRequest, dto.ImageRequest:
return types.RelayFormatOpenAIImage, true
case *dto.AudioRequest, dto.AudioRequest:
return types.RelayFormatOpenAIAudio, true
default:
return "", false
}
}
func AppendRequestConversionFromRequest(info *RelayInfo, req any) {
if info == nil {
return
}
format, ok := GuessRelayFormatFromRequest(req)
if !ok {
return
}
info.AppendRequestConversion(format)
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/model"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
@@ -73,9 +74,32 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
return types.NewError(fmt.Errorf("invalid api type: %d", info.ApiType), types.ErrorCodeInvalidApiType, types.ErrOptionWithSkipRetry())
}
adaptor.Init(info)
passThroughGlobal := model_setting.GetGlobalSettings().PassThroughRequestEnabled
if info.RelayMode == relayconstant.RelayModeChatCompletions &&
!passThroughGlobal &&
!info.ChannelSetting.PassThroughBodyEnabled &&
service.ShouldChatCompletionsUseResponsesGlobal(info.ChannelId, info.ChannelType, info.OriginModelName) {
applySystemPromptIfNeeded(c, info, request)
usage, newApiErr := chatCompletionsViaResponses(c, info, adaptor, request)
if newApiErr != nil {
return newApiErr
}
var containAudioTokens = usage.CompletionTokenDetails.AudioTokens > 0 || usage.PromptTokensDetails.AudioTokens > 0
var containsAudioRatios = ratio_setting.ContainsAudioRatio(info.OriginModelName) || ratio_setting.ContainsAudioCompletionRatio(info.OriginModelName)
if containAudioTokens && containsAudioRatios {
service.PostAudioConsumeQuota(c, info, usage, "")
} else {
postConsumeQuota(c, info, usage)
}
return nil
}
var requestBody io.Reader
if model_setting.GetGlobalSettings().PassThroughRequestEnabled || info.ChannelSetting.PassThroughBodyEnabled {
if passThroughGlobal || info.ChannelSetting.PassThroughBodyEnabled {
body, err := common.GetRequestBody(c)
if err != nil {
return types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
@@ -89,6 +113,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
if info.ChannelSetting.SystemPrompt != "" {
// 如果有系统提示,则将其添加到请求中
@@ -194,6 +219,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
}
func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent ...string) {
originUsage := usage
if usage == nil {
usage = &dto.Usage{
PromptTokens: relayInfo.GetEstimatePromptTokens(),
@@ -202,6 +228,13 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
}
extraContent = append(extraContent, "上游无计费信息")
}
if originUsage != nil {
service.ObserveChannelAffinityUsageCacheFromContext(ctx, usage)
}
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens
cacheTokens := usage.PromptTokensDetails.CachedTokens
@@ -301,6 +334,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
var audioInputQuota decimal.Decimal
var audioInputPrice float64
isClaudeUsageSemantic := relayInfo.ChannelType == constant.ChannelTypeAnthropic
if !relayInfo.PriceData.UsePrice {
baseTokens := dPromptTokens
// 减去 cached tokens
@@ -308,14 +342,14 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens需要减去
var cachedTokensWithRatio decimal.Decimal
if !dCacheTokens.IsZero() {
if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
if !isClaudeUsageSemantic {
baseTokens = baseTokens.Sub(dCacheTokens)
}
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
}
var dCachedCreationTokensWithRatio decimal.Decimal
if !dCachedCreationTokens.IsZero() {
if relayInfo.ChannelType != constant.ChannelTypeAnthropic {
if !isClaudeUsageSemantic {
baseTokens = baseTokens.Sub(dCachedCreationTokens)
}
dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
@@ -425,6 +459,14 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
}
logContent := strings.Join(extraContent, ", ")
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
if adminRejectReason != "" {
other["reject_reason"] = adminRejectReason
}
// For chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently.
if isClaudeUsageSemantic {
other["claude"] = true
other["usage_semantic"] = "anthropic"
}
if imageTokens != 0 {
other["image"] = true
other["image_ratio"] = imageRatio

View File

@@ -50,6 +50,8 @@ const (
RelayModeRealtime
RelayModeGemini
RelayModeResponsesCompact
)
func Path2RelayMode(path string) int {
@@ -70,6 +72,8 @@ func Path2RelayMode(path string) int {
relayMode = RelayModeImagesEdits
} else if strings.HasPrefix(path, "/v1/edits") {
relayMode = RelayModeEdits
} else if strings.HasPrefix(path, "/v1/responses/compact") {
relayMode = RelayModeResponsesCompact
} else if strings.HasPrefix(path, "/v1/responses") {
relayMode = RelayModeResponses
} else if strings.HasPrefix(path, "/v1/audio/speech") {

View File

@@ -45,6 +45,7 @@ func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := json.Marshal(convertedRequest)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())

View File

@@ -149,6 +149,7 @@ func GeminiHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())

View File

@@ -4,13 +4,27 @@ import (
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/gin-gonic/gin"
)
func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Request) error {
if info.ChannelMeta == nil {
info.ChannelMeta = &common.ChannelMeta{}
}
isResponsesCompact := info.RelayMode == relayconstant.RelayModeResponsesCompact
originModelName := info.OriginModelName
mappingModelName := originModelName
if isResponsesCompact && strings.HasSuffix(originModelName, ratio_setting.CompactModelSuffix) {
mappingModelName = strings.TrimSuffix(originModelName, ratio_setting.CompactModelSuffix)
}
// map model name
modelMapping := c.GetString("model_mapping")
if modelMapping != "" && modelMapping != "{}" {
@@ -21,7 +35,7 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
}
// 支持链式模型重定向,最终使用链尾的模型
currentModel := info.OriginModelName
currentModel := mappingModelName
visitedModels := map[string]bool{
currentModel: true,
}
@@ -51,6 +65,15 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
info.UpstreamModelName = currentModel
}
}
if isResponsesCompact {
finalUpstreamModelName := mappingModelName
if info.IsModelMapped && info.UpstreamModelName != "" {
finalUpstreamModelName = info.UpstreamModelName
}
info.UpstreamModelName = finalUpstreamModelName
info.OriginModelName = ratio_setting.WithCompactModelSuffix(finalUpstreamModelName)
}
if request != nil {
request.SetModelName(info.UpstreamModelName)
}

View File

@@ -34,6 +34,8 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt
request, err = GetAndValidateClaudeRequest(c)
case types.RelayFormatOpenAIResponses:
request, err = GetAndValidateResponsesRequest(c)
case types.RelayFormatOpenAIResponsesCompaction:
request, err = GetAndValidateResponsesCompactionRequest(c)
case types.RelayFormatOpenAIImage:
request, err = GetAndValidOpenAIImageRequest(c, relayMode)
@@ -125,6 +127,17 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest
return request, nil
}
func GetAndValidateResponsesCompactionRequest(c *gin.Context) (*dto.OpenAIResponsesCompactionRequest, error) {
request := &dto.OpenAIResponsesCompactionRequest{}
if err := common.UnmarshalBodyReusable(c, request); err != nil {
return nil, err
}
if request.Model == "" {
return nil, errors.New("model is required")
}
return request, nil
}
func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageRequest, error) {
imageRequest := &dto.ImageRequest{}

View File

@@ -57,6 +57,7 @@ func ImageHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed)
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
switch convertedRequest.(type) {
case *bytes.Buffer:

View File

@@ -0,0 +1,41 @@
package reasonmap
import (
"strings"
"github.com/QuantumNous/new-api/constant"
)
func ClaudeStopReasonToOpenAIFinishReason(stopReason string) string {
switch strings.ToLower(stopReason) {
case "stop_sequence":
return "stop"
case "end_turn":
return "stop"
case "max_tokens":
return "length"
case "tool_use":
return "tool_calls"
case "refusal":
return constant.FinishReasonContentFilter
default:
return stopReason
}
}
func OpenAIFinishReasonToClaudeStopReason(finishReason string) string {
switch strings.ToLower(finishReason) {
case "stop":
return "end_turn"
case "stop_sequence":
return "stop_sequence"
case "length", "max_tokens":
return "max_tokens"
case constant.FinishReasonContentFilter:
return "refusal"
case "tool_calls":
return "tool_use"
default:
return finishReason
}
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel/baidu_v2"
"github.com/QuantumNous/new-api/relay/channel/claude"
"github.com/QuantumNous/new-api/relay/channel/cloudflare"
"github.com/QuantumNous/new-api/relay/channel/codex"
"github.com/QuantumNous/new-api/relay/channel/cohere"
"github.com/QuantumNous/new-api/relay/channel/coze"
"github.com/QuantumNous/new-api/relay/channel/deepseek"
@@ -117,6 +118,8 @@ func GetAdaptor(apiType int) channel.Adaptor {
return &minimax.Adaptor{}
case constant.APITypeReplicate:
return &replicate.Adaptor{}
case constant.APITypeCodex:
return &codex.Adaptor{}
}
return nil
}
@@ -148,7 +151,7 @@ func GetTaskAdaptor(platform constant.TaskPlatform) channel.TaskAdaptor {
return &taskvertex.TaskAdaptor{}
case constant.ChannelTypeVidu:
return &taskVidu.TaskAdaptor{}
case constant.ChannelTypeDoubaoVideo:
case constant.ChannelTypeDoubaoVideo, constant.ChannelTypeVolcEngine:
return &taskdoubao.TaskAdaptor{}
case constant.ChannelTypeSora, constant.ChannelTypeOpenAI:
return &tasksora.TaskAdaptor{}

View File

@@ -144,12 +144,20 @@ func RelayTaskSubmit(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.
if !success {
defaultPrice, ok := ratio_setting.GetDefaultModelPriceMap()[modelName]
if !ok {
modelPrice = 0.1
modelPrice = float64(common.PreConsumedQuota) / common.QuotaPerUnit
} else {
modelPrice = defaultPrice
}
}
// 处理 auto 分组:从 context 获取实际选中的分组
// 当使用 auto 分组时Distribute 中间件会将实际选中的分组存储在 ContextKeyAutoGroup 中
if autoGroup, exists := common.GetContextKey(c, constant.ContextKeyAutoGroup); exists {
if groupStr, ok := autoGroup.(string); ok && groupStr != "" {
info.UsingGroup = groupStr
}
}
// 预扣
groupRatio := ratio_setting.GetGroupRatio(info.UsingGroup)
var ratio float64

View File

@@ -53,6 +53,7 @@ func RerankHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())

View File

@@ -8,8 +8,10 @@ import (
"strings"
"github.com/QuantumNous/new-api/common"
appconstant "github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
@@ -20,10 +22,37 @@ import (
func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) {
info.InitChannelMeta(c)
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
switch info.ApiType {
case appconstant.APITypeOpenAI, appconstant.APITypeCodex:
default:
return types.NewErrorWithStatusCode(
fmt.Errorf("unsupported endpoint %q for api type %d", "/v1/responses/compact", info.ApiType),
types.ErrorCodeInvalidRequest,
http.StatusBadRequest,
types.ErrOptionWithSkipRetry(),
)
}
}
responsesReq, ok := info.Request.(*dto.OpenAIResponsesRequest)
if !ok {
return types.NewErrorWithStatusCode(fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest, got %T", info.Request), types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
var responsesReq *dto.OpenAIResponsesRequest
switch req := info.Request.(type) {
case *dto.OpenAIResponsesRequest:
responsesReq = req
case *dto.OpenAIResponsesCompactionRequest:
responsesReq = &dto.OpenAIResponsesRequest{
Model: req.Model,
Input: req.Input,
Instructions: req.Instructions,
PreviousResponseID: req.PreviousResponseID,
}
default:
return types.NewErrorWithStatusCode(
fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest or dto.OpenAIResponsesCompactionRequest, got %T", info.Request),
types.ErrorCodeInvalidRequest,
http.StatusBadRequest,
types.ErrOptionWithSkipRetry(),
)
}
request, err := common.DeepCopy(responsesReq)
@@ -53,6 +82,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
}
relaycommon.AppendRequestConversionFromRequest(info, convertedRequest)
jsonData, err := common.Marshal(convertedRequest)
if err != nil {
return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry())
@@ -104,10 +134,28 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
return newAPIError
}
usageDto := usage.(*dto.Usage)
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
originModelName := info.OriginModelName
originPriceData := info.PriceData
_, err := helper.ModelPriceHelper(c, info, info.GetEstimatePromptTokens(), &types.TokenCountMeta{})
if err != nil {
info.OriginModelName = originModelName
info.PriceData = originPriceData
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
}
postConsumeQuota(c, info, usageDto)
info.OriginModelName = originModelName
info.PriceData = originPriceData
return nil
}
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
service.PostAudioConsumeQuota(c, info, usageDto, "")
} else {
postConsumeQuota(c, info, usage.(*dto.Usage))
postConsumeQuota(c, info, usageDto)
}
return nil
}

View File

@@ -11,6 +11,7 @@ import (
func SetApiRouter(router *gin.Engine) {
apiRouter := router.Group("/api")
apiRouter.Use(gzip.Gzip(gzip.DefaultCompression))
apiRouter.Use(middleware.BodyStorageCleanup()) // 清理请求体存储
apiRouter.Use(middleware.GlobalAPIRateLimit())
{
apiRouter.GET("/setup", controller.GetSetup)
@@ -123,9 +124,19 @@ func SetApiRouter(router *gin.Engine) {
{
optionRoute.GET("/", controller.GetOptions)
optionRoute.PUT("/", controller.UpdateOption)
optionRoute.GET("/channel_affinity_cache", controller.GetChannelAffinityCacheStats)
optionRoute.DELETE("/channel_affinity_cache", controller.ClearChannelAffinityCache)
optionRoute.POST("/rest_model_ratio", controller.ResetModelRatio)
optionRoute.POST("/migrate_console_setting", controller.MigrateConsoleSetting) // 用于迁移检测的旧键,下个版本会删除
}
performanceRoute := apiRouter.Group("/performance")
performanceRoute.Use(middleware.RootAuth())
{
performanceRoute.GET("/stats", controller.GetPerformanceStats)
performanceRoute.DELETE("/disk_cache", controller.ClearDiskCache)
performanceRoute.POST("/reset_stats", controller.ResetPerformanceStats)
performanceRoute.POST("/gc", controller.ForceGC)
}
ratioSyncRoute := apiRouter.Group("/ratio_sync")
ratioSyncRoute.Use(middleware.RootAuth())
{
@@ -156,6 +167,12 @@ func SetApiRouter(router *gin.Engine) {
channelRoute.POST("/fix", controller.FixChannelsAbilities)
channelRoute.GET("/fetch_models/:id", controller.FetchUpstreamModels)
channelRoute.POST("/fetch_models", controller.FetchModels)
channelRoute.POST("/codex/oauth/start", controller.StartCodexOAuth)
channelRoute.POST("/codex/oauth/complete", controller.CompleteCodexOAuth)
channelRoute.POST("/:id/codex/oauth/start", controller.StartCodexOAuthForChannel)
channelRoute.POST("/:id/codex/oauth/complete", controller.CompleteCodexOAuthForChannel)
channelRoute.POST("/:id/codex/refresh", controller.RefreshCodexChannelCredential)
channelRoute.GET("/:id/codex/usage", controller.GetCodexChannelUsage)
channelRoute.POST("/ollama/pull", controller.OllamaPullModel)
channelRoute.POST("/ollama/pull/stream", controller.OllamaPullModelStream)
channelRoute.DELETE("/ollama/delete", controller.OllamaDeleteModel)
@@ -203,6 +220,7 @@ func SetApiRouter(router *gin.Engine) {
logRoute.DELETE("/", middleware.AdminAuth(), controller.DeleteHistoryLogs)
logRoute.GET("/stat", middleware.AdminAuth(), controller.GetLogsStat)
logRoute.GET("/self/stat", middleware.UserAuth(), controller.GetLogsSelfStat)
logRoute.GET("/channel_affinity_usage_cache", middleware.AdminAuth(), controller.GetChannelAffinityUsageCacheStats)
logRoute.GET("/search", middleware.AdminAuth(), controller.SearchAllLogs)
logRoute.GET("/self", middleware.UserAuth(), controller.GetUserLogs)
logRoute.GET("/self/search", middleware.UserAuth(), controller.SearchUserLogs)

View File

@@ -13,6 +13,7 @@ import (
func SetRelayRouter(router *gin.Engine) {
router.Use(middleware.CORS())
router.Use(middleware.DecompressRequestMiddleware())
router.Use(middleware.BodyStorageCleanup()) // 清理请求体存储
router.Use(middleware.StatsMiddleware())
// https://platform.openai.com/docs/api-reference/introduction
modelsRouter := router.Group("/v1/models")
@@ -93,6 +94,9 @@ func SetRelayRouter(router *gin.Engine) {
httpRouter.POST("/responses", func(c *gin.Context) {
controller.Relay(c, types.RelayFormatOpenAIResponses)
})
httpRouter.POST("/responses/compact", func(c *gin.Context) {
controller.Relay(c, types.RelayFormatOpenAIResponsesCompaction)
})
// image related routes
httpRouter.POST("/edits", func(c *gin.Context) {

View File

@@ -57,9 +57,12 @@ func ShouldDisableChannel(channelType int, err *types.NewAPIError) bool {
if types.IsSkipRetryError(err) {
return false
}
if err.StatusCode == http.StatusUnauthorized {
if operation_setting.ShouldDisableByStatusCode(err.StatusCode) {
return true
}
//if err.StatusCode == http.StatusUnauthorized {
// return true
//}
if err.StatusCode == http.StatusForbidden {
switch channelType {
case constant.ChannelTypeGemini:

783
service/channel_affinity.go Normal file
View File

@@ -0,0 +1,783 @@
package service
import (
"fmt"
"hash/fnv"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/cachex"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/gin-gonic/gin"
"github.com/samber/hot"
"github.com/tidwall/gjson"
)
const (
ginKeyChannelAffinityCacheKey = "channel_affinity_cache_key"
ginKeyChannelAffinityTTLSeconds = "channel_affinity_ttl_seconds"
ginKeyChannelAffinityMeta = "channel_affinity_meta"
ginKeyChannelAffinityLogInfo = "channel_affinity_log_info"
ginKeyChannelAffinitySkipRetry = "channel_affinity_skip_retry_on_failure"
channelAffinityCacheNamespace = "new-api:channel_affinity:v1"
channelAffinityUsageCacheStatsNamespace = "new-api:channel_affinity_usage_cache_stats:v1"
)
var (
channelAffinityCacheOnce sync.Once
channelAffinityCache *cachex.HybridCache[int]
channelAffinityUsageCacheStatsOnce sync.Once
channelAffinityUsageCacheStatsCache *cachex.HybridCache[ChannelAffinityUsageCacheCounters]
channelAffinityRegexCache sync.Map // map[string]*regexp.Regexp
)
type channelAffinityMeta struct {
CacheKey string
TTLSeconds int
RuleName string
SkipRetry bool
KeySourceType string
KeySourceKey string
KeySourcePath string
KeyHint string
KeyFingerprint string
UsingGroup string
ModelName string
RequestPath string
}
type ChannelAffinityStatsContext struct {
RuleName string
UsingGroup string
KeyFingerprint string
TTLSeconds int64
}
type ChannelAffinityCacheStats struct {
Enabled bool `json:"enabled"`
Total int `json:"total"`
Unknown int `json:"unknown"`
ByRuleName map[string]int `json:"by_rule_name"`
CacheCapacity int `json:"cache_capacity"`
CacheAlgo string `json:"cache_algo"`
}
func getChannelAffinityCache() *cachex.HybridCache[int] {
channelAffinityCacheOnce.Do(func() {
setting := operation_setting.GetChannelAffinitySetting()
capacity := setting.MaxEntries
if capacity <= 0 {
capacity = 100_000
}
defaultTTLSeconds := setting.DefaultTTLSeconds
if defaultTTLSeconds <= 0 {
defaultTTLSeconds = 3600
}
channelAffinityCache = cachex.NewHybridCache[int](cachex.HybridCacheConfig[int]{
Namespace: cachex.Namespace(channelAffinityCacheNamespace),
Redis: common.RDB,
RedisEnabled: func() bool {
return common.RedisEnabled && common.RDB != nil
},
RedisCodec: cachex.IntCodec{},
Memory: func() *hot.HotCache[string, int] {
return hot.NewHotCache[string, int](hot.LRU, capacity).
WithTTL(time.Duration(defaultTTLSeconds) * time.Second).
WithJanitor().
Build()
},
})
})
return channelAffinityCache
}
func GetChannelAffinityCacheStats() ChannelAffinityCacheStats {
setting := operation_setting.GetChannelAffinitySetting()
if setting == nil {
return ChannelAffinityCacheStats{
Enabled: false,
Total: 0,
Unknown: 0,
ByRuleName: map[string]int{},
}
}
cache := getChannelAffinityCache()
mainCap, _ := cache.Capacity()
mainAlgo, _ := cache.Algorithm()
rules := setting.Rules
ruleByName := make(map[string]operation_setting.ChannelAffinityRule, len(rules))
for _, r := range rules {
name := strings.TrimSpace(r.Name)
if name == "" {
continue
}
if !r.IncludeRuleName {
continue
}
ruleByName[name] = r
}
byRuleName := make(map[string]int, len(ruleByName))
for name := range ruleByName {
byRuleName[name] = 0
}
keys, err := cache.Keys()
if err != nil {
common.SysError(fmt.Sprintf("channel affinity cache list keys failed: err=%v", err))
keys = nil
}
total := len(keys)
unknown := 0
for _, k := range keys {
prefix := channelAffinityCacheNamespace + ":"
if !strings.HasPrefix(k, prefix) {
unknown++
continue
}
rest := strings.TrimPrefix(k, prefix)
parts := strings.Split(rest, ":")
if len(parts) < 2 {
unknown++
continue
}
ruleName := parts[0]
rule, ok := ruleByName[ruleName]
if !ok {
unknown++
continue
}
if rule.IncludeUsingGroup {
if len(parts) < 3 {
unknown++
continue
}
}
byRuleName[ruleName]++
}
return ChannelAffinityCacheStats{
Enabled: setting.Enabled,
Total: total,
Unknown: unknown,
ByRuleName: byRuleName,
CacheCapacity: mainCap,
CacheAlgo: mainAlgo,
}
}
func ClearChannelAffinityCacheAll() int {
cache := getChannelAffinityCache()
keys, err := cache.Keys()
if err != nil {
common.SysError(fmt.Sprintf("channel affinity cache list keys failed: err=%v", err))
keys = nil
}
if len(keys) > 0 {
if _, err := cache.DeleteMany(keys); err != nil {
common.SysError(fmt.Sprintf("channel affinity cache delete many failed: err=%v", err))
}
}
return len(keys)
}
func ClearChannelAffinityCacheByRuleName(ruleName string) (int, error) {
ruleName = strings.TrimSpace(ruleName)
if ruleName == "" {
return 0, fmt.Errorf("rule_name 不能为空")
}
setting := operation_setting.GetChannelAffinitySetting()
if setting == nil {
return 0, fmt.Errorf("channel_affinity_setting 未初始化")
}
var matchedRule *operation_setting.ChannelAffinityRule
for i := range setting.Rules {
r := &setting.Rules[i]
if strings.TrimSpace(r.Name) != ruleName {
continue
}
matchedRule = r
break
}
if matchedRule == nil {
return 0, fmt.Errorf("未知规则名称")
}
if !matchedRule.IncludeRuleName {
return 0, fmt.Errorf("该规则未启用 include_rule_name无法按规则清空缓存")
}
cache := getChannelAffinityCache()
deleted, err := cache.DeleteByPrefix(ruleName)
if err != nil {
return 0, err
}
return deleted, nil
}
func matchAnyRegexCached(patterns []string, s string) bool {
if len(patterns) == 0 || s == "" {
return false
}
for _, pattern := range patterns {
if pattern == "" {
continue
}
re, ok := channelAffinityRegexCache.Load(pattern)
if !ok {
compiled, err := regexp.Compile(pattern)
if err != nil {
continue
}
re = compiled
channelAffinityRegexCache.Store(pattern, re)
}
if re.(*regexp.Regexp).MatchString(s) {
return true
}
}
return false
}
func matchAnyIncludeFold(patterns []string, s string) bool {
if len(patterns) == 0 || s == "" {
return false
}
sLower := strings.ToLower(s)
for _, p := range patterns {
p = strings.TrimSpace(p)
if p == "" {
continue
}
if strings.Contains(sLower, strings.ToLower(p)) {
return true
}
}
return false
}
func extractChannelAffinityValue(c *gin.Context, src operation_setting.ChannelAffinityKeySource) string {
switch src.Type {
case "context_int":
if src.Key == "" {
return ""
}
v := c.GetInt(src.Key)
if v <= 0 {
return ""
}
return strconv.Itoa(v)
case "context_string":
if src.Key == "" {
return ""
}
return strings.TrimSpace(c.GetString(src.Key))
case "gjson":
if src.Path == "" {
return ""
}
body, err := common.GetRequestBody(c)
if err != nil || len(body) == 0 {
return ""
}
res := gjson.GetBytes(body, src.Path)
if !res.Exists() {
return ""
}
switch res.Type {
case gjson.String, gjson.Number, gjson.True, gjson.False:
return strings.TrimSpace(res.String())
default:
return strings.TrimSpace(res.Raw)
}
default:
return ""
}
}
func buildChannelAffinityCacheKeySuffix(rule operation_setting.ChannelAffinityRule, usingGroup string, affinityValue string) string {
parts := make([]string, 0, 3)
if rule.IncludeRuleName && rule.Name != "" {
parts = append(parts, rule.Name)
}
if rule.IncludeUsingGroup && usingGroup != "" {
parts = append(parts, usingGroup)
}
parts = append(parts, affinityValue)
return strings.Join(parts, ":")
}
func setChannelAffinityContext(c *gin.Context, meta channelAffinityMeta) {
c.Set(ginKeyChannelAffinityCacheKey, meta.CacheKey)
c.Set(ginKeyChannelAffinityTTLSeconds, meta.TTLSeconds)
c.Set(ginKeyChannelAffinityMeta, meta)
}
func getChannelAffinityContext(c *gin.Context) (string, int, bool) {
keyAny, ok := c.Get(ginKeyChannelAffinityCacheKey)
if !ok {
return "", 0, false
}
key, ok := keyAny.(string)
if !ok || key == "" {
return "", 0, false
}
ttlAny, ok := c.Get(ginKeyChannelAffinityTTLSeconds)
if !ok {
return key, 0, true
}
ttlSeconds, _ := ttlAny.(int)
return key, ttlSeconds, true
}
func getChannelAffinityMeta(c *gin.Context) (channelAffinityMeta, bool) {
anyMeta, ok := c.Get(ginKeyChannelAffinityMeta)
if !ok {
return channelAffinityMeta{}, false
}
meta, ok := anyMeta.(channelAffinityMeta)
if !ok {
return channelAffinityMeta{}, false
}
return meta, true
}
func GetChannelAffinityStatsContext(c *gin.Context) (ChannelAffinityStatsContext, bool) {
if c == nil {
return ChannelAffinityStatsContext{}, false
}
meta, ok := getChannelAffinityMeta(c)
if !ok {
return ChannelAffinityStatsContext{}, false
}
ruleName := strings.TrimSpace(meta.RuleName)
keyFp := strings.TrimSpace(meta.KeyFingerprint)
usingGroup := strings.TrimSpace(meta.UsingGroup)
if ruleName == "" || keyFp == "" {
return ChannelAffinityStatsContext{}, false
}
ttlSeconds := int64(meta.TTLSeconds)
if ttlSeconds <= 0 {
return ChannelAffinityStatsContext{}, false
}
return ChannelAffinityStatsContext{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
TTLSeconds: ttlSeconds,
}, true
}
func affinityFingerprint(s string) string {
if s == "" {
return ""
}
hex := common.Sha1([]byte(s))
if len(hex) >= 8 {
return hex[:8]
}
return hex
}
func buildChannelAffinityKeyHint(s string) string {
s = strings.TrimSpace(s)
if s == "" {
return ""
}
s = strings.ReplaceAll(s, "\n", " ")
s = strings.ReplaceAll(s, "\r", " ")
if len(s) <= 12 {
return s
}
return s[:4] + "..." + s[len(s)-4:]
}
func GetPreferredChannelByAffinity(c *gin.Context, modelName string, usingGroup string) (int, bool) {
setting := operation_setting.GetChannelAffinitySetting()
if setting == nil || !setting.Enabled {
return 0, false
}
path := ""
if c != nil && c.Request != nil && c.Request.URL != nil {
path = c.Request.URL.Path
}
userAgent := ""
if c != nil && c.Request != nil {
userAgent = c.Request.UserAgent()
}
for _, rule := range setting.Rules {
if !matchAnyRegexCached(rule.ModelRegex, modelName) {
continue
}
if len(rule.PathRegex) > 0 && !matchAnyRegexCached(rule.PathRegex, path) {
continue
}
if len(rule.UserAgentInclude) > 0 && !matchAnyIncludeFold(rule.UserAgentInclude, userAgent) {
continue
}
var affinityValue string
var usedSource operation_setting.ChannelAffinityKeySource
for _, src := range rule.KeySources {
affinityValue = extractChannelAffinityValue(c, src)
if affinityValue != "" {
usedSource = src
break
}
}
if affinityValue == "" {
continue
}
if rule.ValueRegex != "" && !matchAnyRegexCached([]string{rule.ValueRegex}, affinityValue) {
continue
}
ttlSeconds := rule.TTLSeconds
if ttlSeconds <= 0 {
ttlSeconds = setting.DefaultTTLSeconds
}
cacheKeySuffix := buildChannelAffinityCacheKeySuffix(rule, usingGroup, affinityValue)
cacheKeyFull := channelAffinityCacheNamespace + ":" + cacheKeySuffix
setChannelAffinityContext(c, channelAffinityMeta{
CacheKey: cacheKeyFull,
TTLSeconds: ttlSeconds,
RuleName: rule.Name,
SkipRetry: rule.SkipRetryOnFailure,
KeySourceType: strings.TrimSpace(usedSource.Type),
KeySourceKey: strings.TrimSpace(usedSource.Key),
KeySourcePath: strings.TrimSpace(usedSource.Path),
KeyHint: buildChannelAffinityKeyHint(affinityValue),
KeyFingerprint: affinityFingerprint(affinityValue),
UsingGroup: usingGroup,
ModelName: modelName,
RequestPath: path,
})
cache := getChannelAffinityCache()
channelID, found, err := cache.Get(cacheKeySuffix)
if err != nil {
common.SysError(fmt.Sprintf("channel affinity cache get failed: key=%s, err=%v", cacheKeyFull, err))
return 0, false
}
if found {
return channelID, true
}
return 0, false
}
return 0, false
}
func ShouldSkipRetryAfterChannelAffinityFailure(c *gin.Context) bool {
if c == nil {
return false
}
v, ok := c.Get(ginKeyChannelAffinitySkipRetry)
if !ok {
return false
}
b, ok := v.(bool)
if !ok {
return false
}
return b
}
func MarkChannelAffinityUsed(c *gin.Context, selectedGroup string, channelID int) {
if c == nil || channelID <= 0 {
return
}
meta, ok := getChannelAffinityMeta(c)
if !ok {
return
}
c.Set(ginKeyChannelAffinitySkipRetry, meta.SkipRetry)
info := map[string]interface{}{
"reason": meta.RuleName,
"rule_name": meta.RuleName,
"using_group": meta.UsingGroup,
"selected_group": selectedGroup,
"model": meta.ModelName,
"request_path": meta.RequestPath,
"channel_id": channelID,
"key_source": meta.KeySourceType,
"key_key": meta.KeySourceKey,
"key_path": meta.KeySourcePath,
"key_hint": meta.KeyHint,
"key_fp": meta.KeyFingerprint,
}
c.Set(ginKeyChannelAffinityLogInfo, info)
}
func AppendChannelAffinityAdminInfo(c *gin.Context, adminInfo map[string]interface{}) {
if c == nil || adminInfo == nil {
return
}
anyInfo, ok := c.Get(ginKeyChannelAffinityLogInfo)
if !ok || anyInfo == nil {
return
}
adminInfo["channel_affinity"] = anyInfo
}
func RecordChannelAffinity(c *gin.Context, channelID int) {
if channelID <= 0 {
return
}
setting := operation_setting.GetChannelAffinitySetting()
if setting == nil || !setting.Enabled {
return
}
if setting.SwitchOnSuccess && c != nil {
if successChannelID := c.GetInt("channel_id"); successChannelID > 0 {
channelID = successChannelID
}
}
cacheKey, ttlSeconds, ok := getChannelAffinityContext(c)
if !ok {
return
}
if ttlSeconds <= 0 {
ttlSeconds = setting.DefaultTTLSeconds
}
if ttlSeconds <= 0 {
ttlSeconds = 3600
}
cache := getChannelAffinityCache()
if err := cache.SetWithTTL(cacheKey, channelID, time.Duration(ttlSeconds)*time.Second); err != nil {
common.SysError(fmt.Sprintf("channel affinity cache set failed: key=%s, err=%v", cacheKey, err))
}
}
type ChannelAffinityUsageCacheStats struct {
RuleName string `json:"rule_name"`
UsingGroup string `json:"using_group"`
KeyFingerprint string `json:"key_fp"`
Hit int64 `json:"hit"`
Total int64 `json:"total"`
WindowSeconds int64 `json:"window_seconds"`
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
TotalTokens int64 `json:"total_tokens"`
CachedTokens int64 `json:"cached_tokens"`
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
LastSeenAt int64 `json:"last_seen_at"`
}
type ChannelAffinityUsageCacheCounters struct {
Hit int64 `json:"hit"`
Total int64 `json:"total"`
WindowSeconds int64 `json:"window_seconds"`
PromptTokens int64 `json:"prompt_tokens"`
CompletionTokens int64 `json:"completion_tokens"`
TotalTokens int64 `json:"total_tokens"`
CachedTokens int64 `json:"cached_tokens"`
PromptCacheHitTokens int64 `json:"prompt_cache_hit_tokens"`
LastSeenAt int64 `json:"last_seen_at"`
}
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) {
statsCtx, ok := GetChannelAffinityStatsContext(c)
if !ok {
return
}
observeChannelAffinityUsageCache(statsCtx, usage)
}
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
ruleName = strings.TrimSpace(ruleName)
usingGroup = strings.TrimSpace(usingGroup)
keyFp = strings.TrimSpace(keyFp)
entryKey := channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp)
if entryKey == "" {
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
}
}
cache := getChannelAffinityUsageCacheStatsCache()
v, found, err := cache.Get(entryKey)
if err != nil || !found {
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
}
}
return ChannelAffinityUsageCacheStats{
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFp,
Hit: v.Hit,
Total: v.Total,
WindowSeconds: v.WindowSeconds,
PromptTokens: v.PromptTokens,
CompletionTokens: v.CompletionTokens,
TotalTokens: v.TotalTokens,
CachedTokens: v.CachedTokens,
PromptCacheHitTokens: v.PromptCacheHitTokens,
LastSeenAt: v.LastSeenAt,
}
}
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) {
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
if entryKey == "" {
return
}
windowSeconds := statsCtx.TTLSeconds
if windowSeconds <= 0 {
return
}
cache := getChannelAffinityUsageCacheStatsCache()
ttl := time.Duration(windowSeconds) * time.Second
lock := channelAffinityUsageCacheStatsLock(entryKey)
lock.Lock()
defer lock.Unlock()
prev, found, err := cache.Get(entryKey)
if err != nil {
return
}
next := prev
if !found {
next = ChannelAffinityUsageCacheCounters{}
}
next.Total++
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
if hit {
next.Hit++
}
next.WindowSeconds = windowSeconds
next.LastSeenAt = time.Now().Unix()
next.CachedTokens += cachedTokens
next.PromptCacheHitTokens += promptCacheHitTokens
next.PromptTokens += int64(usagePromptTokens(usage))
next.CompletionTokens += int64(usageCompletionTokens(usage))
next.TotalTokens += int64(usageTotalTokens(usage))
_ = cache.SetWithTTL(entryKey, next, ttl)
}
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
ruleName = strings.TrimSpace(ruleName)
usingGroup = strings.TrimSpace(usingGroup)
keyFp = strings.TrimSpace(keyFp)
if ruleName == "" || keyFp == "" {
return ""
}
return ruleName + "\n" + usingGroup + "\n" + keyFp
}
func usageCacheSignals(usage *dto.Usage) (hit bool, cachedTokens int64, promptCacheHitTokens int64) {
if usage == nil {
return false, 0, 0
}
cached := int64(0)
if usage.PromptTokensDetails.CachedTokens > 0 {
cached = int64(usage.PromptTokensDetails.CachedTokens)
} else if usage.InputTokensDetails != nil && usage.InputTokensDetails.CachedTokens > 0 {
cached = int64(usage.InputTokensDetails.CachedTokens)
}
pcht := int64(0)
if usage.PromptCacheHitTokens > 0 {
pcht = int64(usage.PromptCacheHitTokens)
}
return cached > 0 || pcht > 0, cached, pcht
}
func usagePromptTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.PromptTokens > 0 {
return usage.PromptTokens
}
return usage.InputTokens
}
func usageCompletionTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.CompletionTokens > 0 {
return usage.CompletionTokens
}
return usage.OutputTokens
}
func usageTotalTokens(usage *dto.Usage) int {
if usage == nil {
return 0
}
if usage.TotalTokens > 0 {
return usage.TotalTokens
}
pt := usagePromptTokens(usage)
ct := usageCompletionTokens(usage)
if pt > 0 || ct > 0 {
return pt + ct
}
return 0
}
func getChannelAffinityUsageCacheStatsCache() *cachex.HybridCache[ChannelAffinityUsageCacheCounters] {
channelAffinityUsageCacheStatsOnce.Do(func() {
setting := operation_setting.GetChannelAffinitySetting()
capacity := 100_000
defaultTTLSeconds := 3600
if setting != nil {
if setting.MaxEntries > 0 {
capacity = setting.MaxEntries
}
if setting.DefaultTTLSeconds > 0 {
defaultTTLSeconds = setting.DefaultTTLSeconds
}
}
channelAffinityUsageCacheStatsCache = cachex.NewHybridCache[ChannelAffinityUsageCacheCounters](cachex.HybridCacheConfig[ChannelAffinityUsageCacheCounters]{
Namespace: cachex.Namespace(channelAffinityUsageCacheStatsNamespace),
Redis: common.RDB,
RedisEnabled: func() bool {
return common.RedisEnabled && common.RDB != nil
},
RedisCodec: cachex.JSONCodec[ChannelAffinityUsageCacheCounters]{},
Memory: func() *hot.HotCache[string, ChannelAffinityUsageCacheCounters] {
return hot.NewHotCache[string, ChannelAffinityUsageCacheCounters](hot.LRU, capacity).
WithTTL(time.Duration(defaultTTLSeconds) * time.Second).
WithJanitor().
Build()
},
})
})
return channelAffinityUsageCacheStatsCache
}
func channelAffinityUsageCacheStatsLock(key string) *sync.Mutex {
h := fnv.New32a()
_, _ = h.Write([]byte(key))
idx := h.Sum32() % uint32(len(channelAffinityUsageCacheStatsLocks))
return &channelAffinityUsageCacheStatsLocks[idx]
}

Some files were not shown because too many files have changed in this diff Show More