mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-19 08:58:37 +00:00
feat: add minimax tts
This commit is contained in:
130
relay/channel/minimax/adaptor.go
Normal file
130
relay/channel/minimax/adaptor.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package minimax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/dto"
|
||||||
|
"github.com/QuantumNous/new-api/relay/channel"
|
||||||
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
|
"github.com/QuantumNous/new-api/relay/constant"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Adaptor struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
|
||||||
|
return nil, errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
|
||||||
|
return nil, errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
|
||||||
|
if info.RelayMode != constant.RelayModeAudioSpeech {
|
||||||
|
return nil, errors.New("unsupported audio relay mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
voiceID := mapVoiceType(request.Voice)
|
||||||
|
speed := request.Speed
|
||||||
|
outputFormat := mapOutputFormat(request.ResponseFormat)
|
||||||
|
|
||||||
|
c.Set("response_format", outputFormat)
|
||||||
|
|
||||||
|
minimaxRequest := MiniMaxTTSRequest{
|
||||||
|
Model: getTTSModel(info.OriginModelName),
|
||||||
|
Text: request.Input,
|
||||||
|
VoiceID: voiceID,
|
||||||
|
Speed: speed,
|
||||||
|
OutputFormat: outputFormat,
|
||||||
|
}
|
||||||
|
|
||||||
|
// 同步扩展字段的厂商自定义metadata
|
||||||
|
if len(request.Metadata) > 0 {
|
||||||
|
if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil {
|
||||||
|
return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonData, err := json.Marshal(minimaxRequest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error marshalling minimax request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Debug: log the request structure
|
||||||
|
fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData))
|
||||||
|
|
||||||
|
return bytes.NewReader(jsonData), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
|
||||||
|
return request, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||||
|
return GetRequestURL(info)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
|
||||||
|
channel.SetupApiRequestHeader(info, c, req)
|
||||||
|
|
||||||
|
if info.RelayMode == constant.RelayModeAudioSpeech {
|
||||||
|
req.Set("Content-Type", "application/json")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Set("Authorization", "Bearer "+info.ApiKey)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
|
||||||
|
if request == nil {
|
||||||
|
return nil, errors.New("request is nil")
|
||||||
|
}
|
||||||
|
return request, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
|
||||||
|
return request, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
||||||
|
return nil, errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
|
||||||
|
return channel.DoApiRequest(a, c, info, requestBody)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||||||
|
if info.RelayMode == constant.RelayModeAudioSpeech {
|
||||||
|
return handleTTSResponse(c, resp, info)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For chat completions, just pass through the response
|
||||||
|
// MiniMax API is compatible with OpenAI format
|
||||||
|
return handleChatCompletionResponse(c, resp, info)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) GetModelList() []string {
|
||||||
|
return ModelList
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *Adaptor) GetChannelName() string {
|
||||||
|
return ChannelName
|
||||||
|
}
|
||||||
@@ -8,6 +8,12 @@ var ModelList = []string{
|
|||||||
"abab6-chat",
|
"abab6-chat",
|
||||||
"abab5.5-chat",
|
"abab5.5-chat",
|
||||||
"abab5.5s-chat",
|
"abab5.5s-chat",
|
||||||
|
"speech-2.5-hd-preview",
|
||||||
|
"speech-2.5-turbo-preview",
|
||||||
|
"speech-02-hd",
|
||||||
|
"speech-02-turbo",
|
||||||
|
"speech-01-hd",
|
||||||
|
"speech-01-turbo",
|
||||||
}
|
}
|
||||||
|
|
||||||
var ChannelName = "minimax"
|
var ChannelName = "minimax"
|
||||||
|
|||||||
@@ -3,9 +3,23 @@ package minimax
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
channelconstant "github.com/QuantumNous/new-api/constant"
|
||||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
|
"github.com/QuantumNous/new-api/relay/constant"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||||
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil
|
baseUrl := info.ChannelBaseUrl
|
||||||
|
if baseUrl == "" {
|
||||||
|
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax]
|
||||||
|
}
|
||||||
|
|
||||||
|
switch info.RelayMode {
|
||||||
|
case constant.RelayModeChatCompletions:
|
||||||
|
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil
|
||||||
|
case constant.RelayModeAudioSpeech:
|
||||||
|
return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
212
relay/channel/minimax/tts.go
Normal file
212
relay/channel/minimax/tts.go
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
package minimax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/dto"
|
||||||
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MiniMaxTTSRequest struct {
|
||||||
|
Model string `json:"model"`
|
||||||
|
Text string `json:"text"`
|
||||||
|
VoiceID string `json:"voice_id"`
|
||||||
|
Speed float64 `json:"speed,omitempty"`
|
||||||
|
Vol float64 `json:"vol,omitempty"`
|
||||||
|
Pitch int `json:"pitch,omitempty"`
|
||||||
|
AudioSampleRate int `json:"audio_sample_rate,omitempty"`
|
||||||
|
OutputFormat string `json:"output_format,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MiniMaxTTSResponse struct {
|
||||||
|
Created int `json:"created"`
|
||||||
|
Data []MiniMaxTTSData `json:"data"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
Object string `json:"object"`
|
||||||
|
Usage MiniMaxTTSUsage `json:"usage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MiniMaxTTSData struct {
|
||||||
|
Index int `json:"index"`
|
||||||
|
Audio string `json:"audio"`
|
||||||
|
Text string `json:"text"`
|
||||||
|
FinishReason string `json:"finish_reason"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MiniMaxTTSUsage struct {
|
||||||
|
TotalTokens int `json:"total_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MiniMaxTTSErrorResponse struct {
|
||||||
|
Error MiniMaxTTSError `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MiniMaxTTSError struct {
|
||||||
|
Code string `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAI voice to MiniMax voice_id mapping
|
||||||
|
var openAIToMiniMaxVoiceMap = map[string]string{
|
||||||
|
"alloy": "male-qn-qingse",
|
||||||
|
"echo": "male-qn-jingying",
|
||||||
|
"fable": "female-shaonv",
|
||||||
|
"onyx": "male-qn-badao",
|
||||||
|
"nova": "female-shaonv-jingpin",
|
||||||
|
"shimmer": "female-yujie",
|
||||||
|
// Add some standard MiniMax voice IDs
|
||||||
|
"voice-1": "male-qn-qingse",
|
||||||
|
"voice-2": "female-shaonv",
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAI response format to MiniMax output format mapping
|
||||||
|
var responseFormatToOutputFormatMap = map[string]string{
|
||||||
|
"mp3": "mp3",
|
||||||
|
"opus": "mp3",
|
||||||
|
"aac": "aac",
|
||||||
|
"flac": "flac",
|
||||||
|
"wav": "wav",
|
||||||
|
"pcm": "pcm",
|
||||||
|
}
|
||||||
|
|
||||||
|
// TTS model mapping - MiniMax uses speech-01 or speech-01-turbo
|
||||||
|
var modelToTTSModelMap = map[string]string{
|
||||||
|
"speech-01": "speech-01",
|
||||||
|
"speech-01-turbo": "speech-01-turbo",
|
||||||
|
"tts-1": "speech-01-turbo",
|
||||||
|
"tts-1-hd": "speech-01",
|
||||||
|
}
|
||||||
|
|
||||||
|
func mapVoiceType(openAIVoice string) string {
|
||||||
|
if voice, ok := openAIToMiniMaxVoiceMap[openAIVoice]; ok {
|
||||||
|
return voice
|
||||||
|
}
|
||||||
|
return "female-shaonv" // default voice
|
||||||
|
}
|
||||||
|
|
||||||
|
func mapOutputFormat(responseFormat string) string {
|
||||||
|
if format, ok := responseFormatToOutputFormatMap[responseFormat]; ok {
|
||||||
|
return format
|
||||||
|
}
|
||||||
|
return "mp3" // default format
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTTSModel(modelName string) string {
|
||||||
|
if ttsModel, ok := modelToTTSModelMap[modelName]; ok {
|
||||||
|
return ttsModel
|
||||||
|
}
|
||||||
|
return "speech-01-turbo" // default model
|
||||||
|
}
|
||||||
|
|
||||||
|
func getContentTypeByFormat(format string) string {
|
||||||
|
contentTypeMap := map[string]string{
|
||||||
|
"mp3": "audio/mpeg",
|
||||||
|
"wav": "audio/wav",
|
||||||
|
"flac": "audio/flac",
|
||||||
|
"aac": "audio/aac",
|
||||||
|
"pcm": "audio/pcm",
|
||||||
|
}
|
||||||
|
if ct, ok := contentTypeMap[format]; ok {
|
||||||
|
return ct
|
||||||
|
}
|
||||||
|
return "audio/mpeg" // default to mp3
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||||||
|
body, readErr := io.ReadAll(resp.Body)
|
||||||
|
if readErr != nil {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New("failed to read minimax response"),
|
||||||
|
types.ErrorCodeReadResponseBodyFailed,
|
||||||
|
http.StatusInternalServerError,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// First try to parse as error response
|
||||||
|
var errorResp MiniMaxTTSErrorResponse
|
||||||
|
if unmarshalErr := json.Unmarshal(body, &errorResp); unmarshalErr == nil && errorResp.Error.Code != "" {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New(errorResp.Error.Message),
|
||||||
|
types.ErrorCodeBadResponse,
|
||||||
|
http.StatusBadRequest,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse as successful response
|
||||||
|
var minimaxResp MiniMaxTTSResponse
|
||||||
|
if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New("failed to parse minimax response"),
|
||||||
|
types.ErrorCodeBadResponseBody,
|
||||||
|
http.StatusInternalServerError,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have audio data
|
||||||
|
if len(minimaxResp.Data) == 0 || minimaxResp.Data[0].Audio == "" {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New("no audio data in response"),
|
||||||
|
types.ErrorCodeBadResponse,
|
||||||
|
http.StatusBadRequest,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode base64 audio data
|
||||||
|
audioData, decodeErr := base64.StdEncoding.DecodeString(minimaxResp.Data[0].Audio)
|
||||||
|
if decodeErr != nil {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New("failed to decode audio data"),
|
||||||
|
types.ErrorCodeBadResponseBody,
|
||||||
|
http.StatusInternalServerError,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get output format from context or default to mp3
|
||||||
|
outputFormat := c.GetString("response_format")
|
||||||
|
if outputFormat == "" {
|
||||||
|
outputFormat = "mp3"
|
||||||
|
}
|
||||||
|
|
||||||
|
contentType := getContentTypeByFormat(outputFormat)
|
||||||
|
c.Header("Content-Type", contentType)
|
||||||
|
c.Data(http.StatusOK, contentType, audioData)
|
||||||
|
|
||||||
|
usage = &dto.Usage{
|
||||||
|
PromptTokens: info.PromptTokens,
|
||||||
|
CompletionTokens: 0,
|
||||||
|
TotalTokens: minimaxResp.Usage.TotalTokens,
|
||||||
|
}
|
||||||
|
|
||||||
|
return usage, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||||||
|
body, readErr := io.ReadAll(resp.Body)
|
||||||
|
if readErr != nil {
|
||||||
|
return nil, types.NewErrorWithStatusCode(
|
||||||
|
errors.New("failed to read minimax response"),
|
||||||
|
types.ErrorCodeReadResponseBodyFailed,
|
||||||
|
http.StatusInternalServerError,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Set response headers
|
||||||
|
for key, values := range resp.Header {
|
||||||
|
for _, value := range values {
|
||||||
|
c.Header(key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Data(resp.StatusCode, "application/json", body)
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user