Merge pull request #1134 from QuantumNous/fix_ping_keepalive

fix: 流式请求ping
2026-04-10 07:07:26 +00:00 · 2025-05-31 22:16:16 +08:00 · 2025-05-31 22:13:17 +08:00 · 2025-05-31 18:44:48 +08:00 · 2025-05-31 18:44:24 +08:00 · 2025-05-31 18:43:57 +08:00
6 changed files with 100 additions and 83 deletions
--- a/README.en.md
+++ b/README.en.md
@@ -110,6 +110,7 @@ For detailed configuration instructions, please refer to [Installation Guide-Env
 - `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
 - `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
 - `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
+- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false`

 ## Deployment

--- a/README.md
+++ b/README.md
@@ -110,6 +110,7 @@ New API提供了丰富的功能，详细特性请参考[特性说明](https://do
 - `AZURE_DEFAULT_API_VERSION`：Azure渠道默认API版本，默认 `2025-04-01-preview`
 - `NOTIFICATION_LIMIT_DURATION_MINUTE`：通知限制持续时间，默认 `10`分钟
 - `NOTIFY_LIMIT_COUNT`：用户通知在指定持续时间内的最大数量，默认 `2`
+- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志，默认`false`

 ## 部署

--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -43,7 +43,7 @@ type GeneralOpenAIRequest struct {
 	ResponseFormat   *ResponseFormat   `json:"response_format,omitempty"`
 	EncodingFormat   any               `json:"encoding_format,omitempty"`
 	Seed             float64           `json:"seed,omitempty"`
-	ParallelTooCalls bool              `json:"parallel_tool_calls,omitempty"`
+	ParallelTooCalls *bool             `json:"parallel_tool_calls,omitempty"`
 	Tools            []ToolCallRequest `json:"tools,omitempty"`
 	ToolChoice       any               `json:"tool_choice,omitempty"`
 	User             string            `json:"user,omitempty"`
--- a/relay/channel/api_request.go
+++ b/relay/channel/api_request.go
@@ -104,6 +104,65 @@ func DoWssRequest(a Adaptor, c *gin.Context, info *common.RelayInfo, requestBody
 	return targetConn, nil
 }

+func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.CancelFunc {
+	pingerCtx, stopPinger := context.WithCancel(context.Background())
+
+	gopool.Go(func() {
+		defer func() {
+			if common2.DebugEnabled {
+				println("SSE ping goroutine stopped.")
+			}
+		}()
+
+		if pingInterval <= 0 {
+			pingInterval = helper.DefaultPingInterval
+		}
+
+		ticker := time.NewTicker(pingInterval)
+		// 退出时清理 ticker
+		defer ticker.Stop()
+
+		var pingMutex sync.Mutex
+		if common2.DebugEnabled {
+			println("SSE ping goroutine started")
+		}
+
+		for {
+			select {
+			// 发送 ping 数据
+			case <-ticker.C:
+				if err := sendPingData(c, &pingMutex); err != nil {
+					return
+				}
+			// 收到退出信号
+			case <-pingerCtx.Done():
+				return
+			// request 结束
+			case <-c.Request.Context().Done():
+				return
+			}
+		}
+	})
+
+	return stopPinger
+}
+
+func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
+	mutex.Lock()
+	defer mutex.Unlock()
+
+	err := helper.PingData(c)
+	if err != nil {
+		common2.LogError(c, "SSE ping error: "+err.Error())
+		return err
+	}
+
+	if common2.DebugEnabled {
+		println("SSE ping data sent.")
+	}
+	return nil
+}
+
 func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
 	var client *http.Client
 	var err error
@@ -115,68 +174,28 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
 	} else {
 		client = service.GetHttpClient()
 	}
-	// 流式请求 ping 保活
-	var stopPinger func()
-	generalSettings := operation_setting.GetGeneralSetting()
-	pingEnabled := generalSettings.PingIntervalEnabled
-	var pingerWg sync.WaitGroup
+
 	if info.IsStream {
 		helper.SetEventStreamHeaders(c)
-		pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
-		var pingerCtx context.Context
-		pingerCtx, stopPinger = context.WithCancel(c.Request.Context())

-		if pingEnabled {
-			pingerWg.Add(1)
-			gopool.Go(func() {
-				defer pingerWg.Done()
-				if pingInterval <= 0 {
-					pingInterval = helper.DefaultPingInterval
-				}
-
-				ticker := time.NewTicker(pingInterval)
-				defer ticker.Stop()
-				var pingMutex sync.Mutex
-				if common2.DebugEnabled {
-					println("SSE ping goroutine started")
-				}
-
-				for {
-					select {
-					case <-ticker.C:
-						pingMutex.Lock()
-						err2 := helper.PingData(c)
-						pingMutex.Unlock()
-						if err2 != nil {
-							common2.LogError(c, "SSE ping error: "+err.Error())
-							return
-						}
-						if common2.DebugEnabled {
-							println("SSE ping data sent.")
-						}
-					case <-pingerCtx.Done():
-						if common2.DebugEnabled {
-							println("SSE ping goroutine stopped.")
-						}
-						return
-					}
-				}
-			})
+		// 处理流式请求的 ping 保活
+		generalSettings := operation_setting.GetGeneralSetting()
+		if generalSettings.PingIntervalEnabled {
+			pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
+			stopPinger := startPingKeepAlive(c, pingInterval)
+			defer stopPinger()
 		}
 	}

 	resp, err := client.Do(req)
-	// request结束后停止ping
-	if info.IsStream && pingEnabled {
-		stopPinger()
-		pingerWg.Wait()
-	}
+
 	if err != nil {
 		return nil, err
 	}
 	if resp == nil {
 		return nil, errors.New("resp is nil")
 	}
+
 	_ = req.Body.Close()
 	_ = c.Request.Body.Close()
 	return resp, nil
--- a/relay/channel/gemini/relay-gemini.go
+++ b/relay/channel/gemini/relay-gemini.go
@@ -39,15 +39,22 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 	}

 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
-		if strings.HasSuffix(info.OriginModelName, "-thinking") {
-			budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
-			if budgetTokens == 0 || budgetTokens > 24576 {
-				budgetTokens = 24576
-			}
-			geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-				ThinkingBudget:  common.GetPointer(int(budgetTokens)),
-				IncludeThoughts: true,
-			}
+	        if strings.HasSuffix(info.OriginModelName, "-thinking") {
+	            // 如果模型名以 gemini-2.5-pro 开头，不设置 ThinkingBudget
+	            if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
+	                geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+	                    IncludeThoughts: true,
+	                }
+	            } else {
+	                budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
+	                if budgetTokens == 0 || budgetTokens > 24576 {
+	                    budgetTokens = 24576
+	                }
+	                geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+	                    ThinkingBudget:  common.GetPointer(int(budgetTokens)),
+	                    IncludeThoughts: true,
+	                }
+	            }
 		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
 			geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
 				ThinkingBudget: common.GetPointer(0),
--- a/relay/channel/openai/relay-openai.go
+++ b/relay/channel/openai/relay-openai.go
@@ -273,36 +273,25 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
 }

 func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
-	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-	// Reset response body
-	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
-	// We shouldn't set the header before we parse the response body, because the parse part may fail.
-	// And then we will have to send an error response, but in this case, the header has already been set.
-	// So the httpClient will be confused by the response.
-	// For example, Postman will report error, and we cannot check the response at all.
+	// the status code has been judged before, if there is a body reading failure,
+	// it should be regarded as a non-recoverable error, so it should not return err for external retry.
+	// Analogous to nginx's load balancing, it will only retry if it can't be requested or 
+	// if the upstream returns a specific status code, once the upstream has already written the header, 
+	// the subsequent failure of the response body should be regarded as a non-recoverable error, 
+	// and can be terminated directly.
+	defer resp.Body.Close()
+	usage := &dto.Usage{}
+	usage.PromptTokens = info.PromptTokens
+	usage.TotalTokens = info.PromptTokens
 	for k, v := range resp.Header {
 		c.Writer.Header().Set(k, v[0])
 	}
 	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = io.Copy(c.Writer, resp.Body)
+	c.Writer.WriteHeaderNow()
+	_, err := io.Copy(c.Writer, resp.Body)
 	if err != nil {
-		return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
+		common.LogError(c, err.Error())
 	}
-	err = resp.Body.Close()
-	if err != nil {
-		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
-	}
-
-	usage := &dto.Usage{}
-	usage.PromptTokens = info.PromptTokens
-	usage.TotalTokens = info.PromptTokens
 	return nil, usage
 }
Author	SHA1	Message	Date
Calcium-Ion	b05bb899f1	Merge pull request #1134 from QuantumNous/fix_ping_keepalive fix: 流式请求ping	2025-05-31 22:16:16 +08:00
creamlike1024	c51a30b862	fix: 流式请求ping	2025-05-31 22:13:17 +08:00
Calcium-Ion	9c4d3a6359	Merge pull request #1122 from akkuman/feat/stream-tts feat: streaming response for tts	2025-05-31 18:44:48 +08:00
Calcium-Ion	6936a795a6	Merge pull request #1123 from RedwindA/patch-3 Add `ERROR_LOG_ENABLED` description in README	2025-05-31 18:44:24 +08:00
Calcium-Ion	74defce481	Merge pull request #1130 from xqx121/main Fix: Gemini2.5pro ThinkingConfig	2025-05-31 18:43:57 +08:00
xqx121	1c4d7fd84b	Fix: Gemini2.5pro ThinkingConfig	2025-05-31 17:50:00 +08:00
RedwindA	f907c25b21	Add `ERROR_LOG_ENABLED` description	2025-05-29 12:35:13 +08:00
RedwindA	1b64db5521	Add `ERROR_LOG_ENABLED` description	2025-05-29 12:33:27 +08:00
Akkuman	d608a6f123	feat: streaming response for tts	2025-05-29 10:56:01 +08:00
creamlike1024	361b0abec9	fix: pingerCtx 泄漏	2025-05-28 21:34:45 +08:00
CaIon	e01b517843	fix: Change ParallelTooCalls from bool to *bool in GeneralOpenAIRequest for optional handling	2025-05-28 21:12:55 +08:00