diff --git a/dto/audio.go b/dto/audio.go index d2ee3abe9..ea51516f8 100644 --- a/dto/audio.go +++ b/dto/audio.go @@ -1,17 +1,22 @@ package dto import ( + "encoding/json" + "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" ) type AudioRequest struct { - Model string `json:"model"` - Input string `json:"input"` - Voice string `json:"voice"` - Speed float64 `json:"speed,omitempty"` - ResponseFormat string `json:"response_format,omitempty"` + Model string `json:"model"` + Input string `json:"input"` + Voice string `json:"voice"` + Instructions string `json:"instructions,omitempty"` + ResponseFormat string `json:"response_format,omitempty"` + Speed float64 `json:"speed,omitempty"` + StreamFormat string `json:"stream_format,omitempty"` + Metadata json.RawMessage `json:"metadata,omitempty"` } func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta { diff --git a/relay/channel/volcengine/adaptor.go b/relay/channel/volcengine/adaptor.go index 94cb776e8..c5d9e5dd6 100644 --- a/relay/channel/volcengine/adaptor.go +++ b/relay/channel/volcengine/adaptor.go @@ -47,7 +47,7 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf } voiceType := mapVoiceType(request.Voice) - speedRatio := mapSpeedRatio(request.Speed) + speedRatio := request.Speed encoding := mapEncoding(request.ResponseFormat) c.Set("response_format", encoding) @@ -75,6 +75,13 @@ func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInf }, } + // 同步扩展字段的厂商自定义metadata + if len(request.Metadata) > 0 { + if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil { + return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err) + } + } + jsonData, err := json.Marshal(volcRequest) if err != nil { return nil, fmt.Errorf("error marshalling volcengine request: %w", err) diff --git a/relay/channel/volcengine/tts.go b/relay/channel/volcengine/tts.go index 4cd591396..328512845 100644 --- a/relay/channel/volcengine/tts.go +++ b/relay/channel/volcengine/tts.go @@ -119,20 +119,6 @@ func mapVoiceType(openAIVoice string) string { return openAIVoice } -// [0.1,2],默认为 1,通常保留一位小数即可 -func mapSpeedRatio(speed float64) float64 { - if speed == 0 { - return 1.0 - } - if speed < 0.1 { - return 0.1 - } - if speed > 2.0 { - return 2.0 - } - return speed -} - func mapEncoding(responseFormat string) string { if encoding, ok := responseFormatToEncodingMap[responseFormat]; ok { return encoding