Merge pull request #2081 from feitianbubu/pr/add-miniMax-tts

增加MiniMax语音合成TTS支持
This commit is contained in:
IcedTangerine
2025-10-20 17:48:35 +08:00
committed by GitHub
8 changed files with 360 additions and 8 deletions

View File

@@ -69,6 +69,8 @@ func ChannelType2APIType(channelType int) (int, bool) {
apiType = constant.APITypeMoonshot
case constant.ChannelTypeSubmodel:
apiType = constant.APITypeSubmodel
case constant.ChannelTypeMiniMax:
apiType = constant.APITypeMiniMax
}
if apiType == -1 {
return constant.APITypeOpenAI, false

View File

@@ -33,5 +33,6 @@ const (
APITypeJimeng
APITypeMoonshot
APITypeSubmodel
APITypeMiniMax
APITypeDummy // this one is only for count, do not add any channel after this
)

View File

@@ -0,0 +1,132 @@
package minimax
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type Adaptor struct {
}
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
if info.RelayMode != constant.RelayModeAudioSpeech {
return nil, errors.New("unsupported audio relay mode")
}
voiceID := request.Voice
speed := request.Speed
outputFormat := request.ResponseFormat
minimaxRequest := MiniMaxTTSRequest{
Model: info.OriginModelName,
Text: request.Input,
VoiceSetting: VoiceSetting{
VoiceID: voiceID,
Speed: speed,
},
AudioSetting: &AudioSetting{
Format: outputFormat,
},
OutputFormat: outputFormat,
}
// 同步扩展字段的厂商自定义metadata
if len(request.Metadata) > 0 {
if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil {
return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err)
}
}
jsonData, err := json.Marshal(minimaxRequest)
if err != nil {
return nil, fmt.Errorf("error marshalling minimax request: %w", err)
}
if outputFormat != "hex" {
outputFormat = "url"
}
c.Set("response_format", outputFormat)
// Debug: log the request structure
// fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData))
return bytes.NewReader(jsonData), nil
}
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
return request, nil
}
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
}
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
return GetRequestURL(info)
}
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
channel.SetupApiRequestHeader(info, c, req)
req.Set("Authorization", "Bearer "+info.ApiKey)
return nil
}
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
return nil, nil
}
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
return request, nil
}
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
return channel.DoApiRequest(a, c, info, requestBody)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
if info.RelayMode == constant.RelayModeAudioSpeech {
return handleTTSResponse(c, resp, info)
}
adaptor := openai.Adaptor{}
return adaptor.DoResponse(c, resp, info)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
}
func (a *Adaptor) GetChannelName() string {
return ChannelName
}

View File

@@ -8,6 +8,12 @@ var ModelList = []string{
"abab6-chat",
"abab5.5-chat",
"abab5.5s-chat",
"speech-2.5-hd-preview",
"speech-2.5-turbo-preview",
"speech-02-hd",
"speech-02-turbo",
"speech-01-hd",
"speech-01-turbo",
}
var ChannelName = "minimax"

View File

@@ -3,9 +3,23 @@ package minimax
import (
"fmt"
channelconstant "github.com/QuantumNous/new-api/constant"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant"
)
func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil
baseUrl := info.ChannelBaseUrl
if baseUrl == "" {
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax]
}
switch info.RelayMode {
case constant.RelayModeChatCompletions:
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil
case constant.RelayModeAudioSpeech:
return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil
default:
return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
}
}

View File

@@ -0,0 +1,194 @@
package minimax
import (
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type MiniMaxTTSRequest struct {
Model string `json:"model"`
Text string `json:"text"`
Stream bool `json:"stream,omitempty"`
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
VoiceSetting VoiceSetting `json:"voice_setting"`
PronunciationDict *PronunciationDict `json:"pronunciation_dict,omitempty"`
AudioSetting *AudioSetting `json:"audio_setting,omitempty"`
TimbreWeights []TimbreWeight `json:"timbre_weights,omitempty"`
LanguageBoost string `json:"language_boost,omitempty"`
VoiceModify *VoiceModify `json:"voice_modify,omitempty"`
SubtitleEnable bool `json:"subtitle_enable,omitempty"`
OutputFormat string `json:"output_format,omitempty"`
AigcWatermark bool `json:"aigc_watermark,omitempty"`
}
type StreamOptions struct {
ExcludeAggregatedAudio bool `json:"exclude_aggregated_audio,omitempty"`
}
type VoiceSetting struct {
VoiceID string `json:"voice_id"`
Speed float64 `json:"speed,omitempty"`
Vol float64 `json:"vol,omitempty"`
Pitch int `json:"pitch,omitempty"`
Emotion string `json:"emotion,omitempty"`
TextNormalization bool `json:"text_normalization,omitempty"`
LatexRead bool `json:"latex_read,omitempty"`
}
type PronunciationDict struct {
Tone []string `json:"tone,omitempty"`
}
type AudioSetting struct {
SampleRate int `json:"sample_rate,omitempty"`
Bitrate int `json:"bitrate,omitempty"`
Format string `json:"format,omitempty"`
Channel int `json:"channel,omitempty"`
ForceCbr bool `json:"force_cbr,omitempty"`
}
type TimbreWeight struct {
VoiceID string `json:"voice_id"`
Weight int `json:"weight"`
}
type VoiceModify struct {
Pitch int `json:"pitch,omitempty"`
Intensity int `json:"intensity,omitempty"`
Timbre int `json:"timbre,omitempty"`
SoundEffects string `json:"sound_effects,omitempty"`
}
type MiniMaxTTSResponse struct {
Data MiniMaxTTSData `json:"data"`
ExtraInfo MiniMaxExtraInfo `json:"extra_info"`
TraceID string `json:"trace_id"`
BaseResp MiniMaxBaseResp `json:"base_resp"`
}
type MiniMaxTTSData struct {
Audio string `json:"audio"`
Status int `json:"status"`
}
type MiniMaxExtraInfo struct {
UsageCharacters int64 `json:"usage_characters"`
}
type MiniMaxBaseResp struct {
StatusCode int64 `json:"status_code"`
StatusMsg string `json:"status_msg"`
}
func getContentTypeByFormat(format string) string {
contentTypeMap := map[string]string{
"mp3": "audio/mpeg",
"wav": "audio/wav",
"flac": "audio/flac",
"aac": "audio/aac",
"pcm": "audio/pcm",
}
if ct, ok := contentTypeMap[format]; ok {
return ct
}
return "audio/mpeg" // default to mp3
}
func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to read minimax response: %w", readErr),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// Parse response
var minimaxResp MiniMaxTTSResponse
if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to unmarshal minimax TTS response: %w", unmarshalErr),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
// Check base_resp status code
if minimaxResp.BaseResp.StatusCode != 0 {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("minimax TTS error: %d - %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
// Check if we have audio data
if minimaxResp.Data.Audio == "" {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("no audio data in minimax TTS response"),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
if strings.HasPrefix(minimaxResp.Data.Audio, "http") {
c.Redirect(http.StatusFound, minimaxResp.Data.Audio)
} else {
// Handle hex-encoded audio data
audioData, decodeErr := hex.DecodeString(minimaxResp.Data.Audio)
if decodeErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to decode hex audio data: %w", decodeErr),
types.ErrorCodeBadResponse,
http.StatusInternalServerError,
)
}
// Determine content type - default to mp3
contentType := "audio/mpeg"
c.Data(http.StatusOK, contentType, audioData)
}
usage = &dto.Usage{
PromptTokens: info.PromptTokens,
CompletionTokens: 0,
TotalTokens: int(minimaxResp.ExtraInfo.UsageCharacters),
}
return usage, nil
}
func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to read minimax response"),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// Set response headers
for key, values := range resp.Header {
for _, value := range values {
c.Header(key, value)
}
}
c.Data(resp.StatusCode, "application/json", body)
return nil, nil
}

View File

@@ -18,7 +18,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/ai360"
"github.com/QuantumNous/new-api/relay/channel/lingyiwanwu"
"github.com/QuantumNous/new-api/relay/channel/minimax"
//"github.com/QuantumNous/new-api/relay/channel/minimax"
"github.com/QuantumNous/new-api/relay/channel/openrouter"
"github.com/QuantumNous/new-api/relay/channel/xinference"
relaycommon "github.com/QuantumNous/new-api/relay/common"
@@ -161,8 +161,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, apiVersion)
}
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, requestURL, info.ChannelType), nil
case constant.ChannelTypeMiniMax:
return minimax.GetRequestURL(info)
//case constant.ChannelTypeMiniMax:
// return minimax.GetRequestURL(info)
case constant.ChannelTypeCustom:
url := info.ChannelBaseUrl
url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
@@ -599,8 +599,8 @@ func (a *Adaptor) GetModelList() []string {
return ai360.ModelList
case constant.ChannelTypeLingYiWanWu:
return lingyiwanwu.ModelList
case constant.ChannelTypeMiniMax:
return minimax.ModelList
//case constant.ChannelTypeMiniMax:
// return minimax.ModelList
case constant.ChannelTypeXinference:
return xinference.ModelList
case constant.ChannelTypeOpenRouter:
@@ -616,8 +616,8 @@ func (a *Adaptor) GetChannelName() string {
return ai360.ChannelName
case constant.ChannelTypeLingYiWanWu:
return lingyiwanwu.ChannelName
case constant.ChannelTypeMiniMax:
return minimax.ChannelName
//case constant.ChannelTypeMiniMax:
// return minimax.ChannelName
case constant.ChannelTypeXinference:
return xinference.ChannelName
case constant.ChannelTypeOpenRouter:

View File

@@ -18,6 +18,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel/gemini"
"github.com/QuantumNous/new-api/relay/channel/jimeng"
"github.com/QuantumNous/new-api/relay/channel/jina"
"github.com/QuantumNous/new-api/relay/channel/minimax"
"github.com/QuantumNous/new-api/relay/channel/mistral"
"github.com/QuantumNous/new-api/relay/channel/mokaai"
"github.com/QuantumNous/new-api/relay/channel/moonshot"
@@ -108,6 +109,8 @@ func GetAdaptor(apiType int) channel.Adaptor {
return &moonshot.Adaptor{} // Moonshot uses Claude API
case constant.APITypeSubmodel:
return &submodel.Adaptor{}
case constant.APITypeMiniMax:
return &minimax.Adaptor{}
}
return nil
}