chore: Ignore .zed and debug binaries in .gitignore

Merge pull request #2070 from QuantumNous/ali-channel-support-stream-options
Ali channel support stream options
2026-03-30 21:01:18 +00:00 · 2025-10-21 16:40:22 +08:00 · 2025-10-20 23:24:33 +08:00 · 2025-10-20 17:48:35 +08:00 · 2025-10-20 17:48:08 +08:00 · 2025-10-20 16:26:50 +08:00
17 changed files with 678 additions and 211 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .idea
 .vscode
+.zed
 upload
 *.exe
 *.db
@@ -10,10 +11,11 @@ web/dist
 .env
 one-api
 new-api
+/__debug_bin*
 .DS_Store
 tiktoken_cache
 .eslintcache
 .gocache

 electron/node_modules
-electron/dist
+electron/dist
--- a/common/api_type.go
+++ b/common/api_type.go
@@ -69,6 +69,8 @@ func ChannelType2APIType(channelType int) (int, bool) {
 		apiType = constant.APITypeMoonshot
 	case constant.ChannelTypeSubmodel:
 		apiType = constant.APITypeSubmodel
+	case constant.ChannelTypeMiniMax:
+		apiType = constant.APITypeMiniMax
 	}
 	if apiType == -1 {
 		return constant.APITypeOpenAI, false
--- a/constant/api_type.go
+++ b/constant/api_type.go
@@ -33,5 +33,6 @@ const (
 	APITypeJimeng
 	APITypeMoonshot
 	APITypeSubmodel
+	APITypeMiniMax
 	APITypeDummy // this one is only for count, do not add any channel after this
 )
--- a/dto/audio.go
+++ b/dto/audio.go
@@ -1,17 +1,22 @@
 package dto

 import (
+	"encoding/json"
+
 	"github.com/QuantumNous/new-api/types"

 	"github.com/gin-gonic/gin"
 )

 type AudioRequest struct {
-	Model          string  `json:"model"`
-	Input          string  `json:"input"`
-	Voice          string  `json:"voice"`
-	Speed          float64 `json:"speed,omitempty"`
-	ResponseFormat string  `json:"response_format,omitempty"`
+	Model          string          `json:"model"`
+	Input          string          `json:"input"`
+	Voice          string          `json:"voice"`
+	Instructions   string          `json:"instructions,omitempty"`
+	ResponseFormat string          `json:"response_format,omitempty"`
+	Speed          float64         `json:"speed,omitempty"`
+	StreamFormat   string          `json:"stream_format,omitempty"`
+	Metadata       json.RawMessage `json:"metadata,omitempty"`
 }

 func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
--- a/relay/channel/ali/text.go
+++ b/relay/channel/ali/text.go
@@ -1,20 +1,7 @@
 package ali

 import (
-	"bufio"
-	"encoding/json"
-	"io"
-	"net/http"
-	"strings"
-
-	"github.com/QuantumNous/new-api/common"
 	"github.com/QuantumNous/new-api/dto"
-	"github.com/QuantumNous/new-api/relay/helper"
-	"github.com/QuantumNous/new-api/service"
-
-	"github.com/QuantumNous/new-api/types"
-
-	"github.com/gin-gonic/gin"
 )

 // https://help.aliyun.com/document_detail/613695.html?spm=a2c4g.2399480.0.0.1adb778fAdzP9w#341800c0f8w0r
@@ -29,180 +16,3 @@ func requestOpenAI2Ali(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReque
 	}
 	return &request
 }
-
-func embeddingRequestOpenAI2Ali(request dto.EmbeddingRequest) *AliEmbeddingRequest {
-	return &AliEmbeddingRequest{
-		Model: request.Model,
-		Input: struct {
-			Texts []string `json:"texts"`
-		}{
-			Texts: request.ParseInput(),
-		},
-	}
-}
-
-func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
-	var fullTextResponse dto.FlexibleEmbeddingResponse
-	err := json.NewDecoder(resp.Body).Decode(&fullTextResponse)
-	if err != nil {
-		return types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError), nil
-	}
-
-	service.CloseResponseBodyGracefully(resp)
-
-	model := c.GetString("model")
-	if model == "" {
-		model = "text-embedding-v4"
-	}
-	jsonResponse, err := json.Marshal(fullTextResponse)
-	if err != nil {
-		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
-	}
-	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	c.Writer.Write(jsonResponse)
-	return nil, &fullTextResponse.Usage
-}
-
-func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse, model string) *dto.OpenAIEmbeddingResponse {
-	openAIEmbeddingResponse := dto.OpenAIEmbeddingResponse{
-		Object: "list",
-		Data:   make([]dto.OpenAIEmbeddingResponseItem, 0, len(response.Output.Embeddings)),
-		Model:  model,
-		Usage:  dto.Usage{TotalTokens: response.Usage.TotalTokens},
-	}
-
-	for _, item := range response.Output.Embeddings {
-		openAIEmbeddingResponse.Data = append(openAIEmbeddingResponse.Data, dto.OpenAIEmbeddingResponseItem{
-			Object:    `embedding`,
-			Index:     item.TextIndex,
-			Embedding: item.Embedding,
-		})
-	}
-	return &openAIEmbeddingResponse
-}
-
-func responseAli2OpenAI(response *AliResponse) *dto.OpenAITextResponse {
-	choice := dto.OpenAITextResponseChoice{
-		Index: 0,
-		Message: dto.Message{
-			Role:    "assistant",
-			Content: response.Output.Text,
-		},
-		FinishReason: response.Output.FinishReason,
-	}
-	fullTextResponse := dto.OpenAITextResponse{
-		Id:      response.RequestId,
-		Object:  "chat.completion",
-		Created: common.GetTimestamp(),
-		Choices: []dto.OpenAITextResponseChoice{choice},
-		Usage: dto.Usage{
-			PromptTokens:     response.Usage.InputTokens,
-			CompletionTokens: response.Usage.OutputTokens,
-			TotalTokens:      response.Usage.InputTokens + response.Usage.OutputTokens,
-		},
-	}
-	return &fullTextResponse
-}
-
-func streamResponseAli2OpenAI(aliResponse *AliResponse) *dto.ChatCompletionsStreamResponse {
-	var choice dto.ChatCompletionsStreamResponseChoice
-	choice.Delta.SetContentString(aliResponse.Output.Text)
-	if aliResponse.Output.FinishReason != "null" {
-		finishReason := aliResponse.Output.FinishReason
-		choice.FinishReason = &finishReason
-	}
-	response := dto.ChatCompletionsStreamResponse{
-		Id:      aliResponse.RequestId,
-		Object:  "chat.completion.chunk",
-		Created: common.GetTimestamp(),
-		Model:   "ernie-bot",
-		Choices: []dto.ChatCompletionsStreamResponseChoice{choice},
-	}
-	return &response
-}
-
-func aliStreamHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
-	var usage dto.Usage
-	scanner := bufio.NewScanner(resp.Body)
-	scanner.Split(bufio.ScanLines)
-	dataChan := make(chan string)
-	stopChan := make(chan bool)
-	go func() {
-		for scanner.Scan() {
-			data := scanner.Text()
-			if len(data) < 5 { // ignore blank line or wrong format
-				continue
-			}
-			if data[:5] != "data:" {
-				continue
-			}
-			data = data[5:]
-			dataChan <- data
-		}
-		stopChan <- true
-	}()
-	helper.SetEventStreamHeaders(c)
-	lastResponseText := ""
-	c.Stream(func(w io.Writer) bool {
-		select {
-		case data := <-dataChan:
-			var aliResponse AliResponse
-			err := json.Unmarshal([]byte(data), &aliResponse)
-			if err != nil {
-				common.SysLog("error unmarshalling stream response: " + err.Error())
-				return true
-			}
-			if aliResponse.Usage.OutputTokens != 0 {
-				usage.PromptTokens = aliResponse.Usage.InputTokens
-				usage.CompletionTokens = aliResponse.Usage.OutputTokens
-				usage.TotalTokens = aliResponse.Usage.InputTokens + aliResponse.Usage.OutputTokens
-			}
-			response := streamResponseAli2OpenAI(&aliResponse)
-			response.Choices[0].Delta.SetContentString(strings.TrimPrefix(response.Choices[0].Delta.GetContentString(), lastResponseText))
-			lastResponseText = aliResponse.Output.Text
-			jsonResponse, err := json.Marshal(response)
-			if err != nil {
-				common.SysLog("error marshalling stream response: " + err.Error())
-				return true
-			}
-			c.Render(-1, common.CustomEvent{Data: "data: " + string(jsonResponse)})
-			return true
-		case <-stopChan:
-			c.Render(-1, common.CustomEvent{Data: "data: [DONE]"})
-			return false
-		}
-	})
-	service.CloseResponseBodyGracefully(resp)
-	return nil, &usage
-}
-
-func aliHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
-	var aliResponse AliResponse
-	responseBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
-	}
-	service.CloseResponseBodyGracefully(resp)
-	err = json.Unmarshal(responseBody, &aliResponse)
-	if err != nil {
-		return types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError), nil
-	}
-	if aliResponse.Code != "" {
-		return types.WithOpenAIError(types.OpenAIError{
-			Message: aliResponse.Message,
-			Type:    "ali_error",
-			Param:   aliResponse.RequestId,
-			Code:    aliResponse.Code,
-		}, resp.StatusCode), nil
-	}
-	fullTextResponse := responseAli2OpenAI(&aliResponse)
-	jsonResponse, err := common.Marshal(fullTextResponse)
-	if err != nil {
-		return types.NewError(err, types.ErrorCodeBadResponseBody), nil
-	}
-	c.Writer.Header().Set("Content-Type", "application/json")
-	c.Writer.WriteHeader(resp.StatusCode)
-	_, err = c.Writer.Write(jsonResponse)
-	return nil, &fullTextResponse.Usage
-}
--- a/relay/channel/minimax/adaptor.go
+++ b/relay/channel/minimax/adaptor.go
@@ -0,0 +1,132 @@
+package minimax
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+
+	"github.com/QuantumNous/new-api/dto"
+	"github.com/QuantumNous/new-api/relay/channel"
+	"github.com/QuantumNous/new-api/relay/channel/openai"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/constant"
+	"github.com/QuantumNous/new-api/types"
+
+	"github.com/gin-gonic/gin"
+)
+
+type Adaptor struct {
+}
+
+func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
+	if info.RelayMode != constant.RelayModeAudioSpeech {
+		return nil, errors.New("unsupported audio relay mode")
+	}
+
+	voiceID := request.Voice
+	speed := request.Speed
+	outputFormat := request.ResponseFormat
+
+	minimaxRequest := MiniMaxTTSRequest{
+		Model: info.OriginModelName,
+		Text:  request.Input,
+		VoiceSetting: VoiceSetting{
+			VoiceID: voiceID,
+			Speed:   speed,
+		},
+		AudioSetting: &AudioSetting{
+			Format: outputFormat,
+		},
+		OutputFormat: outputFormat,
+	}
+
+	// 同步扩展字段的厂商自定义metadata
+	if len(request.Metadata) > 0 {
+		if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil {
+			return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err)
+		}
+	}
+
+	jsonData, err := json.Marshal(minimaxRequest)
+	if err != nil {
+		return nil, fmt.Errorf("error marshalling minimax request: %w", err)
+	}
+	if outputFormat != "hex" {
+		outputFormat = "url"
+	}
+
+	c.Set("response_format", outputFormat)
+
+	// Debug: log the request structure
+	// fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData))
+
+	return bytes.NewReader(jsonData), nil
+}
+
+func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
+	return request, nil
+}
+
+func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
+}
+
+func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
+	return GetRequestURL(info)
+}
+
+func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
+	channel.SetupApiRequestHeader(info, c, req)
+	req.Set("Authorization", "Bearer "+info.ApiKey)
+	return nil
+}
+
+func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
+	if request == nil {
+		return nil, errors.New("request is nil")
+	}
+	return request, nil
+}
+
+func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
+	return nil, nil
+}
+
+func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
+	return request, nil
+}
+
+func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
+	return channel.DoApiRequest(a, c, info, requestBody)
+}
+
+func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
+	if info.RelayMode == constant.RelayModeAudioSpeech {
+		return handleTTSResponse(c, resp, info)
+	}
+
+	adaptor := openai.Adaptor{}
+	return adaptor.DoResponse(c, resp, info)
+}
+
+func (a *Adaptor) GetModelList() []string {
+	return ModelList
+}
+
+func (a *Adaptor) GetChannelName() string {
+	return ChannelName
+}
--- a/relay/channel/minimax/constants.go
+++ b/relay/channel/minimax/constants.go
@@ -8,6 +8,12 @@ var ModelList = []string{
 	"abab6-chat",
 	"abab5.5-chat",
 	"abab5.5s-chat",
+	"speech-2.5-hd-preview",
+	"speech-2.5-turbo-preview",
+	"speech-02-hd",
+	"speech-02-turbo",
+	"speech-01-hd",
+	"speech-01-turbo",
 }

 var ChannelName = "minimax"
--- a/relay/channel/minimax/relay-minimax.go
+++ b/relay/channel/minimax/relay-minimax.go
@@ -3,9 +3,23 @@ package minimax
 import (
 	"fmt"

+	channelconstant "github.com/QuantumNous/new-api/constant"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/relay/constant"
 )

 func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil
+	baseUrl := info.ChannelBaseUrl
+	if baseUrl == "" {
+		baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax]
+	}
+
+	switch info.RelayMode {
+	case constant.RelayModeChatCompletions:
+		return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil
+	case constant.RelayModeAudioSpeech:
+		return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil
+	default:
+		return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
+	}
 }
--- a/relay/channel/minimax/tts.go
+++ b/relay/channel/minimax/tts.go
@@ -0,0 +1,194 @@
+package minimax
+
+import (
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/types"
+	"github.com/gin-gonic/gin"
+)
+
+type MiniMaxTTSRequest struct {
+	Model             string             `json:"model"`
+	Text              string             `json:"text"`
+	Stream            bool               `json:"stream,omitempty"`
+	StreamOptions     *StreamOptions     `json:"stream_options,omitempty"`
+	VoiceSetting      VoiceSetting       `json:"voice_setting"`
+	PronunciationDict *PronunciationDict `json:"pronunciation_dict,omitempty"`
+	AudioSetting      *AudioSetting      `json:"audio_setting,omitempty"`
+	TimbreWeights     []TimbreWeight     `json:"timbre_weights,omitempty"`
+	LanguageBoost     string             `json:"language_boost,omitempty"`
+	VoiceModify       *VoiceModify       `json:"voice_modify,omitempty"`
+	SubtitleEnable    bool               `json:"subtitle_enable,omitempty"`
+	OutputFormat      string             `json:"output_format,omitempty"`
+	AigcWatermark     bool               `json:"aigc_watermark,omitempty"`
+}
+
+type StreamOptions struct {
+	ExcludeAggregatedAudio bool `json:"exclude_aggregated_audio,omitempty"`
+}
+
+type VoiceSetting struct {
+	VoiceID           string  `json:"voice_id"`
+	Speed             float64 `json:"speed,omitempty"`
+	Vol               float64 `json:"vol,omitempty"`
+	Pitch             int     `json:"pitch,omitempty"`
+	Emotion           string  `json:"emotion,omitempty"`
+	TextNormalization bool    `json:"text_normalization,omitempty"`
+	LatexRead         bool    `json:"latex_read,omitempty"`
+}
+
+type PronunciationDict struct {
+	Tone []string `json:"tone,omitempty"`
+}
+
+type AudioSetting struct {
+	SampleRate int    `json:"sample_rate,omitempty"`
+	Bitrate    int    `json:"bitrate,omitempty"`
+	Format     string `json:"format,omitempty"`
+	Channel    int    `json:"channel,omitempty"`
+	ForceCbr   bool   `json:"force_cbr,omitempty"`
+}
+
+type TimbreWeight struct {
+	VoiceID string `json:"voice_id"`
+	Weight  int    `json:"weight"`
+}
+
+type VoiceModify struct {
+	Pitch        int    `json:"pitch,omitempty"`
+	Intensity    int    `json:"intensity,omitempty"`
+	Timbre       int    `json:"timbre,omitempty"`
+	SoundEffects string `json:"sound_effects,omitempty"`
+}
+
+type MiniMaxTTSResponse struct {
+	Data      MiniMaxTTSData   `json:"data"`
+	ExtraInfo MiniMaxExtraInfo `json:"extra_info"`
+	TraceID   string           `json:"trace_id"`
+	BaseResp  MiniMaxBaseResp  `json:"base_resp"`
+}
+
+type MiniMaxTTSData struct {
+	Audio  string `json:"audio"`
+	Status int    `json:"status"`
+}
+
+type MiniMaxExtraInfo struct {
+	UsageCharacters int64 `json:"usage_characters"`
+}
+
+type MiniMaxBaseResp struct {
+	StatusCode int64  `json:"status_code"`
+	StatusMsg  string `json:"status_msg"`
+}
+
+func getContentTypeByFormat(format string) string {
+	contentTypeMap := map[string]string{
+		"mp3":  "audio/mpeg",
+		"wav":  "audio/wav",
+		"flac": "audio/flac",
+		"aac":  "audio/aac",
+		"pcm":  "audio/pcm",
+	}
+	if ct, ok := contentTypeMap[format]; ok {
+		return ct
+	}
+	return "audio/mpeg" // default to mp3
+}
+
+func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
+	body, readErr := io.ReadAll(resp.Body)
+	if readErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			fmt.Errorf("failed to read minimax response: %w", readErr),
+			types.ErrorCodeReadResponseBodyFailed,
+			http.StatusInternalServerError,
+		)
+	}
+	defer resp.Body.Close()
+
+	// Parse response
+	var minimaxResp MiniMaxTTSResponse
+	if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			fmt.Errorf("failed to unmarshal minimax TTS response: %w", unmarshalErr),
+			types.ErrorCodeBadResponseBody,
+			http.StatusInternalServerError,
+		)
+	}
+
+	// Check base_resp status code
+	if minimaxResp.BaseResp.StatusCode != 0 {
+		return nil, types.NewErrorWithStatusCode(
+			fmt.Errorf("minimax TTS error: %d - %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg),
+			types.ErrorCodeBadResponse,
+			http.StatusBadRequest,
+		)
+	}
+
+	// Check if we have audio data
+	if minimaxResp.Data.Audio == "" {
+		return nil, types.NewErrorWithStatusCode(
+			fmt.Errorf("no audio data in minimax TTS response"),
+			types.ErrorCodeBadResponse,
+			http.StatusBadRequest,
+		)
+	}
+
+	if strings.HasPrefix(minimaxResp.Data.Audio, "http") {
+		c.Redirect(http.StatusFound, minimaxResp.Data.Audio)
+	} else {
+		// Handle hex-encoded audio data
+		audioData, decodeErr := hex.DecodeString(minimaxResp.Data.Audio)
+		if decodeErr != nil {
+			return nil, types.NewErrorWithStatusCode(
+				fmt.Errorf("failed to decode hex audio data: %w", decodeErr),
+				types.ErrorCodeBadResponse,
+				http.StatusInternalServerError,
+			)
+		}
+
+		// Determine content type - default to mp3
+		contentType := "audio/mpeg"
+
+		c.Data(http.StatusOK, contentType, audioData)
+	}
+
+	usage = &dto.Usage{
+		PromptTokens:     info.PromptTokens,
+		CompletionTokens: 0,
+		TotalTokens:      int(minimaxResp.ExtraInfo.UsageCharacters),
+	}
+
+	return usage, nil
+}
+
+func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
+	body, readErr := io.ReadAll(resp.Body)
+	if readErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			errors.New("failed to read minimax response"),
+			types.ErrorCodeReadResponseBodyFailed,
+			http.StatusInternalServerError,
+		)
+	}
+	defer resp.Body.Close()
+
+	// Set response headers
+	for key, values := range resp.Header {
+		for _, value := range values {
+			c.Header(key, value)
+		}
+	}
+
+	c.Data(resp.StatusCode, "application/json", body)
+	return nil, nil
+}
--- a/relay/channel/ollama/stream.go
+++ b/relay/channel/ollama/stream.go
@@ -121,7 +121,14 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 			if chunk.Message != nil && len(chunk.Message.Thinking) > 0 {
 				raw := strings.TrimSpace(string(chunk.Message.Thinking))
 				if raw != "" && raw != "null" {
-					delta.Choices[0].Delta.SetReasoningContent(raw)
+					// Unmarshal the JSON string to get the actual content without quotes
+					var thinkingContent string
+					if err := json.Unmarshal(chunk.Message.Thinking, &thinkingContent); err == nil {
+						delta.Choices[0].Delta.SetReasoningContent(thinkingContent)
+					} else {
+						// Fallback to raw string if it's not a JSON string
+						delta.Choices[0].Delta.SetReasoningContent(raw)
+					}
 				}
 			}
 			// tool calls
@@ -209,7 +216,14 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 		if ck.Message != nil && len(ck.Message.Thinking) > 0 {
 			raw := strings.TrimSpace(string(ck.Message.Thinking))
 			if raw != "" && raw != "null" {
-				reasoningBuilder.WriteString(raw)
+				// Unmarshal the JSON string to get the actual content without quotes
+				var thinkingContent string
+				if err := json.Unmarshal(ck.Message.Thinking, &thinkingContent); err == nil {
+					reasoningBuilder.WriteString(thinkingContent)
+				} else {
+					// Fallback to raw string if it's not a JSON string
+					reasoningBuilder.WriteString(raw)
+				}
 			}
 		}
 		if ck.Message != nil && ck.Message.Content != "" {
@@ -229,7 +243,14 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 			if len(single.Message.Thinking) > 0 {
 				raw := strings.TrimSpace(string(single.Message.Thinking))
 				if raw != "" && raw != "null" {
-					reasoningBuilder.WriteString(raw)
+					// Unmarshal the JSON string to get the actual content without quotes
+					var thinkingContent string
+					if err := json.Unmarshal(single.Message.Thinking, &thinkingContent); err == nil {
+						reasoningBuilder.WriteString(thinkingContent)
+					} else {
+						// Fallback to raw string if it's not a JSON string
+						reasoningBuilder.WriteString(raw)
+					}
 				}
 			}
 			aggContent.WriteString(single.Message.Content)
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -18,7 +18,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/channel"
 	"github.com/QuantumNous/new-api/relay/channel/ai360"
 	"github.com/QuantumNous/new-api/relay/channel/lingyiwanwu"
-	"github.com/QuantumNous/new-api/relay/channel/minimax"
+	//"github.com/QuantumNous/new-api/relay/channel/minimax"
 	"github.com/QuantumNous/new-api/relay/channel/openrouter"
 	"github.com/QuantumNous/new-api/relay/channel/xinference"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
@@ -161,8 +161,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, apiVersion)
 		}
 		return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, requestURL, info.ChannelType), nil
-	case constant.ChannelTypeMiniMax:
-		return minimax.GetRequestURL(info)
+	//case constant.ChannelTypeMiniMax:
+	//	return minimax.GetRequestURL(info)
 	case constant.ChannelTypeCustom:
 		url := info.ChannelBaseUrl
 		url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
@@ -599,8 +599,8 @@ func (a *Adaptor) GetModelList() []string {
 		return ai360.ModelList
 	case constant.ChannelTypeLingYiWanWu:
 		return lingyiwanwu.ModelList
-	case constant.ChannelTypeMiniMax:
-		return minimax.ModelList
+	//case constant.ChannelTypeMiniMax:
+	//	return minimax.ModelList
 	case constant.ChannelTypeXinference:
 		return xinference.ModelList
 	case constant.ChannelTypeOpenRouter:
@@ -616,8 +616,8 @@ func (a *Adaptor) GetChannelName() string {
 		return ai360.ChannelName
 	case constant.ChannelTypeLingYiWanWu:
 		return lingyiwanwu.ChannelName
-	case constant.ChannelTypeMiniMax:
-		return minimax.ChannelName
+	//case constant.ChannelTypeMiniMax:
+	//	return minimax.ChannelName
 	case constant.ChannelTypeXinference:
 		return xinference.ChannelName
 	case constant.ChannelTypeOpenRouter:
--- a/relay/channel/volcengine/adaptor.go
+++ b/relay/channel/volcengine/adaptor.go
@@ -37,8 +37,57 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn
 }

 func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
-	//TODO implement me
-	return nil, errors.New("not implemented")
+	if info.RelayMode != constant.RelayModeAudioSpeech {
+		return nil, errors.New("unsupported audio relay mode")
+	}
+
+	appID, token, err := parseVolcengineAuth(info.ApiKey)
+	if err != nil {
+		return nil, err
+	}
+
+	voiceType := mapVoiceType(request.Voice)
+	speedRatio := request.Speed
+	encoding := mapEncoding(request.ResponseFormat)
+
+	c.Set("response_format", encoding)
+
+	volcRequest := VolcengineTTSRequest{
+		App: VolcengineTTSApp{
+			AppID:   appID,
+			Token:   token,
+			Cluster: "volcano_tts",
+		},
+		User: VolcengineTTSUser{
+			UID: "openai_relay_user",
+		},
+		Audio: VolcengineTTSAudio{
+			VoiceType:  voiceType,
+			Encoding:   encoding,
+			SpeedRatio: speedRatio,
+			Rate:       24000,
+		},
+		Request: VolcengineTTSReqInfo{
+			ReqID:     generateRequestID(),
+			Text:      request.Input,
+			Operation: "query",
+			Model:     info.OriginModelName,
+		},
+	}
+
+	// 同步扩展字段的厂商自定义metadata
+	if len(request.Metadata) > 0 {
+		if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
+			return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
+		}
+	}
+
+	jsonData, err := json.Marshal(volcRequest)
+	if err != nil {
+		return nil, fmt.Errorf("error marshalling volcengine request: %w", err)
+	}
+
+	return bytes.NewReader(jsonData), nil
 }

 func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
@@ -190,7 +239,6 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
 }

 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
-	// 支持自定义域名，如果未设置则使用默认域名
 	baseUrl := info.ChannelBaseUrl
 	if baseUrl == "" {
 		baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
@@ -217,6 +265,12 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 			return fmt.Sprintf("%s/api/v3/images/edits", baseUrl), nil
 		case constant.RelayModeRerank:
 			return fmt.Sprintf("%s/api/v3/rerank", baseUrl), nil
+		case constant.RelayModeAudioSpeech:
+			// 只有当 baseUrl 是火山默认的官方Url时才改为官方的的TTS接口，否则走透传的New接口
+			if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
+				return "https://openspeech.bytedance.com/api/v1/tts", nil
+			}
+			return fmt.Sprintf("%s/v1/audio/speech", baseUrl), nil
 		default:
 		}
 	}
@@ -225,6 +279,16 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {

 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
 	channel.SetupApiRequestHeader(info, c, req)
+
+	if info.RelayMode == constant.RelayModeAudioSpeech {
+		parts := strings.Split(info.ApiKey, "|")
+		if len(parts) == 2 {
+			req.Set("Authorization", "Bearer;"+parts[1])
+		}
+		req.Set("Content-Type", "application/json")
+		return nil
+	}
+
 	req.Set("Authorization", "Bearer "+info.ApiKey)
 	return nil
 }
@@ -260,6 +324,11 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 }

 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
+	if info.RelayMode == constant.RelayModeAudioSpeech {
+		encoding := mapEncoding(c.GetString("response_format"))
+		return handleTTSResponse(c, resp, info, encoding)
+	}
+
 	adaptor := openai.Adaptor{}
 	usage, err = adaptor.DoResponse(c, resp, info)
 	return
--- a/relay/channel/volcengine/tts.go
+++ b/relay/channel/volcengine/tts.go
@@ -0,0 +1,194 @@
+package volcengine
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/QuantumNous/new-api/dto"
+	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/QuantumNous/new-api/types"
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+)
+
+type VolcengineTTSRequest struct {
+	App     VolcengineTTSApp     `json:"app"`
+	User    VolcengineTTSUser    `json:"user"`
+	Audio   VolcengineTTSAudio   `json:"audio"`
+	Request VolcengineTTSReqInfo `json:"request"`
+}
+
+type VolcengineTTSApp struct {
+	AppID   string `json:"appid"`
+	Token   string `json:"token"`
+	Cluster string `json:"cluster"`
+}
+
+type VolcengineTTSUser struct {
+	UID string `json:"uid"`
+}
+
+type VolcengineTTSAudio struct {
+	VoiceType        string  `json:"voice_type"`
+	Encoding         string  `json:"encoding"`
+	SpeedRatio       float64 `json:"speed_ratio"`
+	Rate             int     `json:"rate"`
+	Bitrate          int     `json:"bitrate,omitempty"`
+	LoudnessRatio    float64 `json:"loudness_ratio,omitempty"`
+	EnableEmotion    bool    `json:"enable_emotion,omitempty"`
+	Emotion          string  `json:"emotion,omitempty"`
+	EmotionScale     float64 `json:"emotion_scale,omitempty"`
+	ExplicitLanguage string  `json:"explicit_language,omitempty"`
+	ContextLanguage  string  `json:"context_language,omitempty"`
+}
+
+type VolcengineTTSReqInfo struct {
+	ReqID           string                   `json:"reqid"`
+	Text            string                   `json:"text"`
+	Operation       string                   `json:"operation"`
+	Model           string                   `json:"model,omitempty"`
+	TextType        string                   `json:"text_type,omitempty"`
+	SilenceDuration float64                  `json:"silence_duration,omitempty"`
+	WithTimestamp   interface{}              `json:"with_timestamp,omitempty"`
+	ExtraParam      *VolcengineTTSExtraParam `json:"extra_param,omitempty"`
+}
+
+type VolcengineTTSExtraParam struct {
+	DisableMarkdownFilter      bool                      `json:"disable_markdown_filter,omitempty"`
+	EnableLatexTn              bool                      `json:"enable_latex_tn,omitempty"`
+	MuteCutThreshold           string                    `json:"mute_cut_threshold,omitempty"`
+	MuteCutRemainMs            string                    `json:"mute_cut_remain_ms,omitempty"`
+	DisableEmojiFilter         bool                      `json:"disable_emoji_filter,omitempty"`
+	UnsupportedCharRatioThresh float64                   `json:"unsupported_char_ratio_thresh,omitempty"`
+	AigcWatermark              bool                      `json:"aigc_watermark,omitempty"`
+	CacheConfig                *VolcengineTTSCacheConfig `json:"cache_config,omitempty"`
+}
+
+type VolcengineTTSCacheConfig struct {
+	TextType int  `json:"text_type,omitempty"`
+	UseCache bool `json:"use_cache,omitempty"`
+}
+
+type VolcengineTTSResponse struct {
+	ReqID    string                     `json:"reqid"`
+	Code     int                        `json:"code"`
+	Message  string                     `json:"message"`
+	Sequence int                        `json:"sequence"`
+	Data     string                     `json:"data"`
+	Addition *VolcengineTTSAdditionInfo `json:"addition,omitempty"`
+}
+
+type VolcengineTTSAdditionInfo struct {
+	Duration string `json:"duration"`
+}
+
+var openAIToVolcengineVoiceMap = map[string]string{
+	"alloy":   "zh_male_M392_conversation_wvae_bigtts",
+	"echo":    "zh_male_wenhao_mars_bigtts",
+	"fable":   "zh_female_tianmei_mars_bigtts",
+	"onyx":    "zh_male_zhibei_mars_bigtts",
+	"nova":    "zh_female_shuangkuaisisi_mars_bigtts",
+	"shimmer": "zh_female_cancan_mars_bigtts",
+}
+
+var responseFormatToEncodingMap = map[string]string{
+	"mp3":  "mp3",
+	"opus": "ogg_opus",
+	"aac":  "mp3",
+	"flac": "mp3",
+	"wav":  "wav",
+	"pcm":  "pcm",
+}
+
+func parseVolcengineAuth(apiKey string) (appID, token string, err error) {
+	parts := strings.Split(apiKey, "|")
+	if len(parts) != 2 {
+		return "", "", errors.New("invalid api key format, expected: appid|access_token")
+	}
+	return parts[0], parts[1], nil
+}
+
+func mapVoiceType(openAIVoice string) string {
+	if voice, ok := openAIToVolcengineVoiceMap[openAIVoice]; ok {
+		return voice
+	}
+	return openAIVoice
+}
+
+func mapEncoding(responseFormat string) string {
+	if encoding, ok := responseFormatToEncodingMap[responseFormat]; ok {
+		return encoding
+	}
+	return "mp3"
+}
+
+func getContentTypeByEncoding(encoding string) string {
+	contentTypeMap := map[string]string{
+		"mp3":      "audio/mpeg",
+		"ogg_opus": "audio/ogg",
+		"wav":      "audio/wav",
+		"pcm":      "audio/pcm",
+	}
+	if ct, ok := contentTypeMap[encoding]; ok {
+		return ct
+	}
+	return "application/octet-stream"
+}
+
+func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, encoding string) (usage any, err *types.NewAPIError) {
+	body, readErr := io.ReadAll(resp.Body)
+	if readErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			errors.New("failed to read volcengine response"),
+			types.ErrorCodeReadResponseBodyFailed,
+			http.StatusInternalServerError,
+		)
+	}
+	defer resp.Body.Close()
+
+	var volcResp VolcengineTTSResponse
+	if unmarshalErr := json.Unmarshal(body, &volcResp); unmarshalErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			errors.New("failed to parse volcengine response"),
+			types.ErrorCodeBadResponseBody,
+			http.StatusInternalServerError,
+		)
+	}
+
+	if volcResp.Code != 3000 {
+		return nil, types.NewErrorWithStatusCode(
+			errors.New(volcResp.Message),
+			types.ErrorCodeBadResponse,
+			http.StatusBadRequest,
+		)
+	}
+
+	audioData, decodeErr := base64.StdEncoding.DecodeString(volcResp.Data)
+	if decodeErr != nil {
+		return nil, types.NewErrorWithStatusCode(
+			errors.New("failed to decode audio data"),
+			types.ErrorCodeBadResponseBody,
+			http.StatusInternalServerError,
+		)
+	}
+
+	contentType := getContentTypeByEncoding(encoding)
+	c.Header("Content-Type", contentType)
+	c.Data(http.StatusOK, contentType, audioData)
+
+	usage = &dto.Usage{
+		PromptTokens:     info.PromptTokens,
+		CompletionTokens: 0,
+		TotalTokens:      info.PromptTokens,
+	}
+
+	return usage, nil
+}
+
+func generateRequestID() string {
+	return uuid.New().String()
+}
--- a/relay/common/relay_info.go
+++ b/relay/common/relay_info.go
@@ -263,6 +263,7 @@ var streamSupportedChannels = map[int]bool{
 	constant.ChannelTypeDeepSeek:   true,
 	constant.ChannelTypeBaiduV2:    true,
 	constant.ChannelTypeZhipu_v4:   true,
+	constant.ChannelTypeAli:        true,
 }

 func GenRelayInfoWs(c *gin.Context, ws *websocket.Conn) *RelayInfo {
--- a/relay/helper/valid_request.go
+++ b/relay/helper/valid_request.go
@@ -22,8 +22,10 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt
 	case types.RelayFormatOpenAI:
 		request, err = GetAndValidateTextRequest(c, relayMode)
 	case types.RelayFormatGemini:
-		if strings.Contains(c.Request.URL.Path, ":embedContent") || strings.Contains(c.Request.URL.Path, ":batchEmbedContents") {
+		if strings.Contains(c.Request.URL.Path, ":embedContent") {
 			request, err = GetAndValidateGeminiEmbeddingRequest(c)
+		} else if strings.Contains(c.Request.URL.Path, ":batchEmbedContents") {
+			request, err = GetAndValidateGeminiBatchEmbeddingRequest(c)
 		} else {
 			request, err = GetAndValidateGeminiRequest(c)
 		}
@@ -319,3 +321,12 @@ func GetAndValidateGeminiEmbeddingRequest(c *gin.Context) (*dto.GeminiEmbeddingR
 	}
 	return request, nil
 }
+
+func GetAndValidateGeminiBatchEmbeddingRequest(c *gin.Context) (*dto.GeminiBatchEmbeddingRequest, error) {
+	request := &dto.GeminiBatchEmbeddingRequest{}
+	err := common.UnmarshalBodyReusable(c, request)
+	if err != nil {
+		return nil, err
+	}
+	return request, nil
+}
--- a/relay/relay_adaptor.go
+++ b/relay/relay_adaptor.go
@@ -18,6 +18,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/channel/gemini"
 	"github.com/QuantumNous/new-api/relay/channel/jimeng"
 	"github.com/QuantumNous/new-api/relay/channel/jina"
+	"github.com/QuantumNous/new-api/relay/channel/minimax"
 	"github.com/QuantumNous/new-api/relay/channel/mistral"
 	"github.com/QuantumNous/new-api/relay/channel/mokaai"
 	"github.com/QuantumNous/new-api/relay/channel/moonshot"
@@ -108,6 +109,8 @@ func GetAdaptor(apiType int) channel.Adaptor {
 		return &moonshot.Adaptor{} // Moonshot uses Claude API
 	case constant.APITypeSubmodel:
 		return &submodel.Adaptor{}
+	case constant.APITypeMiniMax:
+		return &minimax.Adaptor{}
 	}
 	return nil
 }
--- a/web/src/components/table/channels/modals/EditChannelModal.jsx
+++ b/web/src/components/table/channels/modals/EditChannelModal.jsx
@@ -107,6 +107,8 @@ function type2secretPrompt(type) {
      return '按照如下格式输入：AppId|SecretId|SecretKey';
    case 33:
      return '按照如下格式输入：Ak|Sk|Region';
+    case 45:
+        return '请输入渠道对应的鉴权密钥, 豆包语音输入：AppId|AccessToken';
    case 50:
      return '按照如下格式输入: AccessKey|SecretKey, 如果上游是New API，则直接输ApiKey';
    case 51:
Author	SHA1	Message	Date
creamlike1024	ce1fde8500	chore: Ignore .zed and debug binaries in .gitignore	2025-10-21 16:40:22 +08:00
Seefs	4661399639	Merge pull request #2070 from QuantumNous/ali-channel-support-stream-options Ali channel support stream options	2025-10-20 23:24:33 +08:00
IcedTangerine	78d8d458ca	Merge pull request #2081 from feitianbubu/pr/add-miniMax-tts 增加MiniMax语音合成TTS支持	2025-10-20 17:48:35 +08:00
IcedTangerine	e20a287c4b	chore: Comment out debug log in adaptor.go Comment out the debug log for MiniMax TTS Request.	2025-10-20 17:48:08 +08:00
feitianbubu	c7ab0f4f3d	feat: opt minimax tts req struct	2025-10-20 16:26:50 +08:00
feitianbubu	0d1057830b	feat: add minimax api adaptor	2025-10-20 16:26:50 +08:00
feitianbubu	dd1cac3f2e	feat: add minimax tts	2025-10-20 16:26:50 +08:00
creamlike1024	cdbc7a9510	refactor: remove unused functions and imports from ali text handler	2025-10-18 17:00:28 +08:00
creamlike1024	c693bfee5e	feat: add support for Ali channel in streamSupportedChannels	2025-10-18 17:00:08 +08:00
IcedTangerine	7156bf2382	Merge pull request #2068 from feitianbubu/pr/doubao-speech-emotion 豆包语音2.0音色支持情感,情绪,音量	2025-10-18 14:30:17 +08:00
Seefs	c216527f23	Merge pull request #2065 from somnifex/main fix: handle JSON parsing for thinking content in ollama stream	2025-10-18 13:02:56 +08:00
Seefs	b1de0f49df	Merge pull request #2061 from QuantumNous/fix-gemini-batch-embedding-token-count fix: gemini batch embedding token not counted	2025-10-18 12:54:44 +08:00
feitianbubu	525ca09f2c	fix: doubao audio speedRadio to speed	2025-10-18 01:48:36 +08:00
feitianbubu	92fc973bc3	feat: AudioRequest add metadata support custom params	2025-10-18 01:48:36 +08:00
feitianbubu	22ff8e2cbe	feat: sync latest openai speech struct https://platform.openai.com/docs/api-reference/audio/createSpeech	2025-10-18 01:48:36 +08:00
IcedTangerine	1ec664a348	Merge pull request #2067 from feitianbubu/pr/add-doubao-audio 新增支持豆包语音合成2.0功能	2025-10-18 00:14:11 +08:00
IcedTangerine	6a24c37c0e	Fix error message for invalid API key format	2025-10-18 00:13:28 +08:00
feitianbubu	8965fc49c9	feat: add doubao audio token input prompt	2025-10-17 22:06:46 +08:00
feitianbubu	735386c0b9	feat: add doubao tts usage token	2025-10-17 22:06:45 +08:00
feitianbubu	58c4da0ddf	feat: switch to official TTS only when baseUrl is Volcano's official URL	2025-10-17 22:06:45 +08:00
feitianbubu	fe68488b1c	feat: add doubao audio tts	2025-10-17 22:06:45 +08:00
somnifex	25af6e6f77	fix: handle JSON parsing for thinking content in ollama stream and chat handlers	2025-10-17 18:35:08 +08:00
creamlike1024	e2d3b46a3a	fix: gemini batch embedding token not counted	2025-10-17 15:51:04 +08:00