diff --git a/common/api_type.go b/common/api_type.go index 6d7a70529..8dbf4a900 100644 --- a/common/api_type.go +++ b/common/api_type.go @@ -69,6 +69,8 @@ func ChannelType2APIType(channelType int) (int, bool) { apiType = constant.APITypeMoonshot case constant.ChannelTypeSubmodel: apiType = constant.APITypeSubmodel + case constant.ChannelTypeMiniMax: + apiType = constant.APITypeMiniMax } if apiType == -1 { return constant.APITypeOpenAI, false diff --git a/constant/api_type.go b/constant/api_type.go index 130ae9455..156ccc83c 100644 --- a/constant/api_type.go +++ b/constant/api_type.go @@ -33,5 +33,6 @@ const ( APITypeJimeng APITypeMoonshot APITypeSubmodel + APITypeMiniMax APITypeDummy // this one is only for count, do not add any channel after this ) diff --git a/relay/channel/minimax/adaptor.go b/relay/channel/minimax/adaptor.go new file mode 100644 index 000000000..8235abc05 --- /dev/null +++ b/relay/channel/minimax/adaptor.go @@ -0,0 +1,132 @@ +package minimax + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/relay/channel" + "github.com/QuantumNous/new-api/relay/channel/openai" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/relay/constant" + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" +) + +type Adaptor struct { +} + +func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) { + return nil, errors.New("not implemented") +} + +func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) { + return nil, errors.New("not implemented") +} + +func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) { + if info.RelayMode != constant.RelayModeAudioSpeech { + return nil, errors.New("unsupported audio relay mode") + } + + voiceID := request.Voice + speed := request.Speed + outputFormat := request.ResponseFormat + + minimaxRequest := MiniMaxTTSRequest{ + Model: info.OriginModelName, + Text: request.Input, + VoiceSetting: VoiceSetting{ + VoiceID: voiceID, + Speed: speed, + }, + AudioSetting: &AudioSetting{ + Format: outputFormat, + }, + OutputFormat: outputFormat, + } + + // 同步扩展字段的厂商自定义metadata + if len(request.Metadata) > 0 { + if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil { + return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err) + } + } + + jsonData, err := json.Marshal(minimaxRequest) + if err != nil { + return nil, fmt.Errorf("error marshalling minimax request: %w", err) + } + if outputFormat != "hex" { + outputFormat = "url" + } + + c.Set("response_format", outputFormat) + + // Debug: log the request structure + // fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData)) + + return bytes.NewReader(jsonData), nil +} + +func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) { + return request, nil +} + +func (a *Adaptor) Init(info *relaycommon.RelayInfo) { +} + +func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { + return GetRequestURL(info) +} + +func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error { + channel.SetupApiRequestHeader(info, c, req) + req.Set("Authorization", "Bearer "+info.ApiKey) + return nil +} + +func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) { + if request == nil { + return nil, errors.New("request is nil") + } + return request, nil +} + +func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) { + return nil, nil +} + +func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) { + return request, nil +} + +func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) { + return nil, errors.New("not implemented") +} + +func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) { + return channel.DoApiRequest(a, c, info, requestBody) +} + +func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) { + if info.RelayMode == constant.RelayModeAudioSpeech { + return handleTTSResponse(c, resp, info) + } + + adaptor := openai.Adaptor{} + return adaptor.DoResponse(c, resp, info) +} + +func (a *Adaptor) GetModelList() []string { + return ModelList +} + +func (a *Adaptor) GetChannelName() string { + return ChannelName +} diff --git a/relay/channel/minimax/constants.go b/relay/channel/minimax/constants.go index c480cac95..df420d4b1 100644 --- a/relay/channel/minimax/constants.go +++ b/relay/channel/minimax/constants.go @@ -8,6 +8,12 @@ var ModelList = []string{ "abab6-chat", "abab5.5-chat", "abab5.5s-chat", + "speech-2.5-hd-preview", + "speech-2.5-turbo-preview", + "speech-02-hd", + "speech-02-turbo", + "speech-01-hd", + "speech-01-turbo", } var ChannelName = "minimax" diff --git a/relay/channel/minimax/relay-minimax.go b/relay/channel/minimax/relay-minimax.go index 033d10334..b314e69d7 100644 --- a/relay/channel/minimax/relay-minimax.go +++ b/relay/channel/minimax/relay-minimax.go @@ -3,9 +3,23 @@ package minimax import ( "fmt" + channelconstant "github.com/QuantumNous/new-api/constant" relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/relay/constant" ) func GetRequestURL(info *relaycommon.RelayInfo) (string, error) { - return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil + baseUrl := info.ChannelBaseUrl + if baseUrl == "" { + baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax] + } + + switch info.RelayMode { + case constant.RelayModeChatCompletions: + return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil + case constant.RelayModeAudioSpeech: + return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil + default: + return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode) + } } diff --git a/relay/channel/minimax/tts.go b/relay/channel/minimax/tts.go new file mode 100644 index 000000000..4a52d2145 --- /dev/null +++ b/relay/channel/minimax/tts.go @@ -0,0 +1,194 @@ +package minimax + +import ( + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/QuantumNous/new-api/dto" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/types" + "github.com/gin-gonic/gin" +) + +type MiniMaxTTSRequest struct { + Model string `json:"model"` + Text string `json:"text"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + VoiceSetting VoiceSetting `json:"voice_setting"` + PronunciationDict *PronunciationDict `json:"pronunciation_dict,omitempty"` + AudioSetting *AudioSetting `json:"audio_setting,omitempty"` + TimbreWeights []TimbreWeight `json:"timbre_weights,omitempty"` + LanguageBoost string `json:"language_boost,omitempty"` + VoiceModify *VoiceModify `json:"voice_modify,omitempty"` + SubtitleEnable bool `json:"subtitle_enable,omitempty"` + OutputFormat string `json:"output_format,omitempty"` + AigcWatermark bool `json:"aigc_watermark,omitempty"` +} + +type StreamOptions struct { + ExcludeAggregatedAudio bool `json:"exclude_aggregated_audio,omitempty"` +} + +type VoiceSetting struct { + VoiceID string `json:"voice_id"` + Speed float64 `json:"speed,omitempty"` + Vol float64 `json:"vol,omitempty"` + Pitch int `json:"pitch,omitempty"` + Emotion string `json:"emotion,omitempty"` + TextNormalization bool `json:"text_normalization,omitempty"` + LatexRead bool `json:"latex_read,omitempty"` +} + +type PronunciationDict struct { + Tone []string `json:"tone,omitempty"` +} + +type AudioSetting struct { + SampleRate int `json:"sample_rate,omitempty"` + Bitrate int `json:"bitrate,omitempty"` + Format string `json:"format,omitempty"` + Channel int `json:"channel,omitempty"` + ForceCbr bool `json:"force_cbr,omitempty"` +} + +type TimbreWeight struct { + VoiceID string `json:"voice_id"` + Weight int `json:"weight"` +} + +type VoiceModify struct { + Pitch int `json:"pitch,omitempty"` + Intensity int `json:"intensity,omitempty"` + Timbre int `json:"timbre,omitempty"` + SoundEffects string `json:"sound_effects,omitempty"` +} + +type MiniMaxTTSResponse struct { + Data MiniMaxTTSData `json:"data"` + ExtraInfo MiniMaxExtraInfo `json:"extra_info"` + TraceID string `json:"trace_id"` + BaseResp MiniMaxBaseResp `json:"base_resp"` +} + +type MiniMaxTTSData struct { + Audio string `json:"audio"` + Status int `json:"status"` +} + +type MiniMaxExtraInfo struct { + UsageCharacters int64 `json:"usage_characters"` +} + +type MiniMaxBaseResp struct { + StatusCode int64 `json:"status_code"` + StatusMsg string `json:"status_msg"` +} + +func getContentTypeByFormat(format string) string { + contentTypeMap := map[string]string{ + "mp3": "audio/mpeg", + "wav": "audio/wav", + "flac": "audio/flac", + "aac": "audio/aac", + "pcm": "audio/pcm", + } + if ct, ok := contentTypeMap[format]; ok { + return ct + } + return "audio/mpeg" // default to mp3 +} + +func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) { + body, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return nil, types.NewErrorWithStatusCode( + fmt.Errorf("failed to read minimax response: %w", readErr), + types.ErrorCodeReadResponseBodyFailed, + http.StatusInternalServerError, + ) + } + defer resp.Body.Close() + + // Parse response + var minimaxResp MiniMaxTTSResponse + if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil { + return nil, types.NewErrorWithStatusCode( + fmt.Errorf("failed to unmarshal minimax TTS response: %w", unmarshalErr), + types.ErrorCodeBadResponseBody, + http.StatusInternalServerError, + ) + } + + // Check base_resp status code + if minimaxResp.BaseResp.StatusCode != 0 { + return nil, types.NewErrorWithStatusCode( + fmt.Errorf("minimax TTS error: %d - %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg), + types.ErrorCodeBadResponse, + http.StatusBadRequest, + ) + } + + // Check if we have audio data + if minimaxResp.Data.Audio == "" { + return nil, types.NewErrorWithStatusCode( + fmt.Errorf("no audio data in minimax TTS response"), + types.ErrorCodeBadResponse, + http.StatusBadRequest, + ) + } + + if strings.HasPrefix(minimaxResp.Data.Audio, "http") { + c.Redirect(http.StatusFound, minimaxResp.Data.Audio) + } else { + // Handle hex-encoded audio data + audioData, decodeErr := hex.DecodeString(minimaxResp.Data.Audio) + if decodeErr != nil { + return nil, types.NewErrorWithStatusCode( + fmt.Errorf("failed to decode hex audio data: %w", decodeErr), + types.ErrorCodeBadResponse, + http.StatusInternalServerError, + ) + } + + // Determine content type - default to mp3 + contentType := "audio/mpeg" + + c.Data(http.StatusOK, contentType, audioData) + } + + usage = &dto.Usage{ + PromptTokens: info.PromptTokens, + CompletionTokens: 0, + TotalTokens: int(minimaxResp.ExtraInfo.UsageCharacters), + } + + return usage, nil +} + +func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) { + body, readErr := io.ReadAll(resp.Body) + if readErr != nil { + return nil, types.NewErrorWithStatusCode( + errors.New("failed to read minimax response"), + types.ErrorCodeReadResponseBodyFailed, + http.StatusInternalServerError, + ) + } + defer resp.Body.Close() + + // Set response headers + for key, values := range resp.Header { + for _, value := range values { + c.Header(key, value) + } + } + + c.Data(resp.StatusCode, "application/json", body) + return nil, nil +} diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index ec40fa25b..4e41c866a 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -18,7 +18,7 @@ import ( "github.com/QuantumNous/new-api/relay/channel" "github.com/QuantumNous/new-api/relay/channel/ai360" "github.com/QuantumNous/new-api/relay/channel/lingyiwanwu" - "github.com/QuantumNous/new-api/relay/channel/minimax" + //"github.com/QuantumNous/new-api/relay/channel/minimax" "github.com/QuantumNous/new-api/relay/channel/openrouter" "github.com/QuantumNous/new-api/relay/channel/xinference" relaycommon "github.com/QuantumNous/new-api/relay/common" @@ -161,8 +161,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, apiVersion) } return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, requestURL, info.ChannelType), nil - case constant.ChannelTypeMiniMax: - return minimax.GetRequestURL(info) + //case constant.ChannelTypeMiniMax: + // return minimax.GetRequestURL(info) case constant.ChannelTypeCustom: url := info.ChannelBaseUrl url = strings.Replace(url, "{model}", info.UpstreamModelName, -1) @@ -599,8 +599,8 @@ func (a *Adaptor) GetModelList() []string { return ai360.ModelList case constant.ChannelTypeLingYiWanWu: return lingyiwanwu.ModelList - case constant.ChannelTypeMiniMax: - return minimax.ModelList + //case constant.ChannelTypeMiniMax: + // return minimax.ModelList case constant.ChannelTypeXinference: return xinference.ModelList case constant.ChannelTypeOpenRouter: @@ -616,8 +616,8 @@ func (a *Adaptor) GetChannelName() string { return ai360.ChannelName case constant.ChannelTypeLingYiWanWu: return lingyiwanwu.ChannelName - case constant.ChannelTypeMiniMax: - return minimax.ChannelName + //case constant.ChannelTypeMiniMax: + // return minimax.ChannelName case constant.ChannelTypeXinference: return xinference.ChannelName case constant.ChannelTypeOpenRouter: diff --git a/relay/relay_adaptor.go b/relay/relay_adaptor.go index 485abe5af..736a58223 100644 --- a/relay/relay_adaptor.go +++ b/relay/relay_adaptor.go @@ -18,6 +18,7 @@ import ( "github.com/QuantumNous/new-api/relay/channel/gemini" "github.com/QuantumNous/new-api/relay/channel/jimeng" "github.com/QuantumNous/new-api/relay/channel/jina" + "github.com/QuantumNous/new-api/relay/channel/minimax" "github.com/QuantumNous/new-api/relay/channel/mistral" "github.com/QuantumNous/new-api/relay/channel/mokaai" "github.com/QuantumNous/new-api/relay/channel/moonshot" @@ -108,6 +109,8 @@ func GetAdaptor(apiType int) channel.Adaptor { return &moonshot.Adaptor{} // Moonshot uses Claude API case constant.APITypeSubmodel: return &submodel.Adaptor{} + case constant.APITypeMiniMax: + return &minimax.Adaptor{} } return nil }