mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-10 07:07:26 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b05bb899f1 | ||
|
|
c51a30b862 | ||
|
|
9c4d3a6359 | ||
|
|
6936a795a6 | ||
|
|
74defce481 | ||
|
|
1c4d7fd84b | ||
|
|
f907c25b21 | ||
|
|
1b64db5521 | ||
|
|
d608a6f123 | ||
|
|
361b0abec9 | ||
|
|
e01b517843 |
@@ -110,6 +110,7 @@ For detailed configuration instructions, please refer to [Installation Guide-Env
|
||||
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
|
||||
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
|
||||
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
|
||||
- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false`
|
||||
|
||||
## Deployment
|
||||
|
||||
|
||||
@@ -110,6 +110,7 @@ New API提供了丰富的功能,详细特性请参考[特性说明](https://do
|
||||
- `AZURE_DEFAULT_API_VERSION`:Azure渠道默认API版本,默认 `2025-04-01-preview`
|
||||
- `NOTIFICATION_LIMIT_DURATION_MINUTE`:通知限制持续时间,默认 `10`分钟
|
||||
- `NOTIFY_LIMIT_COUNT`:用户通知在指定持续时间内的最大数量,默认 `2`
|
||||
- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志,默认`false`
|
||||
|
||||
## 部署
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ type GeneralOpenAIRequest struct {
|
||||
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
|
||||
EncodingFormat any `json:"encoding_format,omitempty"`
|
||||
Seed float64 `json:"seed,omitempty"`
|
||||
ParallelTooCalls bool `json:"parallel_tool_calls,omitempty"`
|
||||
ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
|
||||
Tools []ToolCallRequest `json:"tools,omitempty"`
|
||||
ToolChoice any `json:"tool_choice,omitempty"`
|
||||
User string `json:"user,omitempty"`
|
||||
|
||||
@@ -104,6 +104,65 @@ func DoWssRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
|
||||
return targetConn, nil
|
||||
}
|
||||
|
||||
func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.CancelFunc {
|
||||
pingerCtx, stopPinger := context.WithCancel(context.Background())
|
||||
|
||||
gopool.Go(func() {
|
||||
defer func() {
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine stopped.")
|
||||
}
|
||||
}()
|
||||
|
||||
if pingInterval <= 0 {
|
||||
pingInterval = helper.DefaultPingInterval
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(pingInterval)
|
||||
// 退出时清理 ticker
|
||||
defer ticker.Stop()
|
||||
|
||||
var pingMutex sync.Mutex
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine started")
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
// 发送 ping 数据
|
||||
case <-ticker.C:
|
||||
if err := sendPingData(c, &pingMutex); err != nil {
|
||||
return
|
||||
}
|
||||
// 收到退出信号
|
||||
case <-pingerCtx.Done():
|
||||
return
|
||||
// request 结束
|
||||
case <-c.Request.Context().Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return stopPinger
|
||||
}
|
||||
|
||||
func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
|
||||
err := helper.PingData(c)
|
||||
if err != nil {
|
||||
common2.LogError(c, "SSE ping error: "+err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping data sent.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
|
||||
var client *http.Client
|
||||
var err error
|
||||
@@ -115,68 +174,28 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
|
||||
} else {
|
||||
client = service.GetHttpClient()
|
||||
}
|
||||
// 流式请求 ping 保活
|
||||
var stopPinger func()
|
||||
generalSettings := operation_setting.GetGeneralSetting()
|
||||
pingEnabled := generalSettings.PingIntervalEnabled
|
||||
var pingerWg sync.WaitGroup
|
||||
|
||||
if info.IsStream {
|
||||
helper.SetEventStreamHeaders(c)
|
||||
pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
|
||||
var pingerCtx context.Context
|
||||
pingerCtx, stopPinger = context.WithCancel(c.Request.Context())
|
||||
|
||||
if pingEnabled {
|
||||
pingerWg.Add(1)
|
||||
gopool.Go(func() {
|
||||
defer pingerWg.Done()
|
||||
if pingInterval <= 0 {
|
||||
pingInterval = helper.DefaultPingInterval
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(pingInterval)
|
||||
defer ticker.Stop()
|
||||
var pingMutex sync.Mutex
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine started")
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
pingMutex.Lock()
|
||||
err2 := helper.PingData(c)
|
||||
pingMutex.Unlock()
|
||||
if err2 != nil {
|
||||
common2.LogError(c, "SSE ping error: "+err.Error())
|
||||
return
|
||||
}
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping data sent.")
|
||||
}
|
||||
case <-pingerCtx.Done():
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine stopped.")
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
// 处理流式请求的 ping 保活
|
||||
generalSettings := operation_setting.GetGeneralSetting()
|
||||
if generalSettings.PingIntervalEnabled {
|
||||
pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
|
||||
stopPinger := startPingKeepAlive(c, pingInterval)
|
||||
defer stopPinger()
|
||||
}
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
// request结束后停止ping
|
||||
if info.IsStream && pingEnabled {
|
||||
stopPinger()
|
||||
pingerWg.Wait()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp == nil {
|
||||
return nil, errors.New("resp is nil")
|
||||
}
|
||||
|
||||
_ = req.Body.Close()
|
||||
_ = c.Request.Body.Close()
|
||||
return resp, nil
|
||||
|
||||
@@ -39,15 +39,22 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
}
|
||||
|
||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||
budgetTokens = 24576
|
||||
}
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
// 如果模型名以 gemini-2.5-pro 开头,不设置 ThinkingBudget
|
||||
if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
} else {
|
||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||
budgetTokens = 24576
|
||||
}
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(0),
|
||||
|
||||
@@ -273,36 +273,25 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
|
||||
}
|
||||
|
||||
func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
||||
responseBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
err = resp.Body.Close()
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
// Reset response body
|
||||
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
|
||||
// We shouldn't set the header before we parse the response body, because the parse part may fail.
|
||||
// And then we will have to send an error response, but in this case, the header has already been set.
|
||||
// So the httpClient will be confused by the response.
|
||||
// For example, Postman will report error, and we cannot check the response at all.
|
||||
// the status code has been judged before, if there is a body reading failure,
|
||||
// it should be regarded as a non-recoverable error, so it should not return err for external retry.
|
||||
// Analogous to nginx's load balancing, it will only retry if it can't be requested or
|
||||
// if the upstream returns a specific status code, once the upstream has already written the header,
|
||||
// the subsequent failure of the response body should be regarded as a non-recoverable error,
|
||||
// and can be terminated directly.
|
||||
defer resp.Body.Close()
|
||||
usage := &dto.Usage{}
|
||||
usage.PromptTokens = info.PromptTokens
|
||||
usage.TotalTokens = info.PromptTokens
|
||||
for k, v := range resp.Header {
|
||||
c.Writer.Header().Set(k, v[0])
|
||||
}
|
||||
c.Writer.WriteHeader(resp.StatusCode)
|
||||
_, err = io.Copy(c.Writer, resp.Body)
|
||||
c.Writer.WriteHeaderNow()
|
||||
_, err := io.Copy(c.Writer, resp.Body)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
|
||||
common.LogError(c, err.Error())
|
||||
}
|
||||
err = resp.Body.Close()
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
|
||||
usage := &dto.Usage{}
|
||||
usage.PromptTokens = info.PromptTokens
|
||||
usage.TotalTokens = info.PromptTokens
|
||||
return nil, usage
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user