Compare commits

...

23 Commits

Author SHA1 Message Date
creamlike1024
ce1fde8500 chore: Ignore .zed and debug binaries in .gitignore 2025-10-21 16:40:22 +08:00
Seefs
4661399639 Merge pull request #2070 from QuantumNous/ali-channel-support-stream-options
Ali channel support stream options
2025-10-20 23:24:33 +08:00
IcedTangerine
78d8d458ca Merge pull request #2081 from feitianbubu/pr/add-miniMax-tts
增加MiniMax语音合成TTS支持
2025-10-20 17:48:35 +08:00
IcedTangerine
e20a287c4b chore: Comment out debug log in adaptor.go
Comment out the debug log for MiniMax TTS Request.
2025-10-20 17:48:08 +08:00
feitianbubu
c7ab0f4f3d feat: opt minimax tts req struct 2025-10-20 16:26:50 +08:00
feitianbubu
0d1057830b feat: add minimax api adaptor 2025-10-20 16:26:50 +08:00
feitianbubu
dd1cac3f2e feat: add minimax tts 2025-10-20 16:26:50 +08:00
creamlike1024
cdbc7a9510 refactor: remove unused functions and imports from ali text handler 2025-10-18 17:00:28 +08:00
creamlike1024
c693bfee5e feat: add support for Ali channel in streamSupportedChannels 2025-10-18 17:00:08 +08:00
IcedTangerine
7156bf2382 Merge pull request #2068 from feitianbubu/pr/doubao-speech-emotion
豆包语音2.0音色支持情感,情绪,音量
2025-10-18 14:30:17 +08:00
Seefs
c216527f23 Merge pull request #2065 from somnifex/main
fix: handle JSON parsing for thinking content in ollama stream
2025-10-18 13:02:56 +08:00
Seefs
b1de0f49df Merge pull request #2061 from QuantumNous/fix-gemini-batch-embedding-token-count
fix: gemini batch embedding token not counted
2025-10-18 12:54:44 +08:00
feitianbubu
525ca09f2c fix: doubao audio speedRadio to speed 2025-10-18 01:48:36 +08:00
feitianbubu
92fc973bc3 feat: AudioRequest add metadata support custom params 2025-10-18 01:48:36 +08:00
feitianbubu
22ff8e2cbe feat: sync latest openai speech struct
https://platform.openai.com/docs/api-reference/audio/createSpeech
2025-10-18 01:48:36 +08:00
IcedTangerine
1ec664a348 Merge pull request #2067 from feitianbubu/pr/add-doubao-audio
新增支持豆包语音合成2.0功能
2025-10-18 00:14:11 +08:00
IcedTangerine
6a24c37c0e Fix error message for invalid API key format 2025-10-18 00:13:28 +08:00
feitianbubu
8965fc49c9 feat: add doubao audio token input prompt 2025-10-17 22:06:46 +08:00
feitianbubu
735386c0b9 feat: add doubao tts usage token 2025-10-17 22:06:45 +08:00
feitianbubu
58c4da0ddf feat: switch to official TTS only when baseUrl is Volcano's official URL 2025-10-17 22:06:45 +08:00
feitianbubu
fe68488b1c feat: add doubao audio tts 2025-10-17 22:06:45 +08:00
somnifex
25af6e6f77 fix: handle JSON parsing for thinking content in ollama stream and chat handlers 2025-10-17 18:35:08 +08:00
creamlike1024
e2d3b46a3a fix: gemini batch embedding token not counted 2025-10-17 15:51:04 +08:00
17 changed files with 678 additions and 211 deletions

4
.gitignore vendored
View File

@@ -1,5 +1,6 @@
.idea
.vscode
.zed
upload
*.exe
*.db
@@ -10,10 +11,11 @@ web/dist
.env
one-api
new-api
/__debug_bin*
.DS_Store
tiktoken_cache
.eslintcache
.gocache
electron/node_modules
electron/dist
electron/dist

View File

@@ -69,6 +69,8 @@ func ChannelType2APIType(channelType int) (int, bool) {
apiType = constant.APITypeMoonshot
case constant.ChannelTypeSubmodel:
apiType = constant.APITypeSubmodel
case constant.ChannelTypeMiniMax:
apiType = constant.APITypeMiniMax
}
if apiType == -1 {
return constant.APITypeOpenAI, false

View File

@@ -33,5 +33,6 @@ const (
APITypeJimeng
APITypeMoonshot
APITypeSubmodel
APITypeMiniMax
APITypeDummy // this one is only for count, do not add any channel after this
)

View File

@@ -1,17 +1,22 @@
package dto
import (
"encoding/json"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type AudioRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Speed float64 `json:"speed,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
Instructions string `json:"instructions,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
Speed float64 `json:"speed,omitempty"`
StreamFormat string `json:"stream_format,omitempty"`
Metadata json.RawMessage `json:"metadata,omitempty"`
}
func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {

View File

@@ -1,20 +1,7 @@
package ali
import (
"bufio"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
// https://help.aliyun.com/document_detail/613695.html?spm=a2c4g.2399480.0.0.1adb778fAdzP9w#341800c0f8w0r
@@ -29,180 +16,3 @@ func requestOpenAI2Ali(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReque
}
return &request
}
func embeddingRequestOpenAI2Ali(request dto.EmbeddingRequest) *AliEmbeddingRequest {
return &AliEmbeddingRequest{
Model: request.Model,
Input: struct {
Texts []string `json:"texts"`
}{
Texts: request.ParseInput(),
},
}
}
func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
var fullTextResponse dto.FlexibleEmbeddingResponse
err := json.NewDecoder(resp.Body).Decode(&fullTextResponse)
if err != nil {
return types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError), nil
}
service.CloseResponseBodyGracefully(resp)
model := c.GetString("model")
if model == "" {
model = "text-embedding-v4"
}
jsonResponse, err := json.Marshal(fullTextResponse)
if err != nil {
return types.NewError(err, types.ErrorCodeBadResponseBody), nil
}
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(resp.StatusCode)
c.Writer.Write(jsonResponse)
return nil, &fullTextResponse.Usage
}
func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse, model string) *dto.OpenAIEmbeddingResponse {
openAIEmbeddingResponse := dto.OpenAIEmbeddingResponse{
Object: "list",
Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(response.Output.Embeddings)),
Model: model,
Usage: dto.Usage{TotalTokens: response.Usage.TotalTokens},
}
for _, item := range response.Output.Embeddings {
openAIEmbeddingResponse.Data = append(openAIEmbeddingResponse.Data, dto.OpenAIEmbeddingResponseItem{
Object: `embedding`,
Index: item.TextIndex,
Embedding: item.Embedding,
})
}
return &openAIEmbeddingResponse
}
func responseAli2OpenAI(response *AliResponse) *dto.OpenAITextResponse {
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: response.Output.Text,
},
FinishReason: response.Output.FinishReason,
}
fullTextResponse := dto.OpenAITextResponse{
Id: response.RequestId,
Object: "chat.completion",
Created: common.GetTimestamp(),
Choices: []dto.OpenAITextResponseChoice{choice},
Usage: dto.Usage{
PromptTokens: response.Usage.InputTokens,
CompletionTokens: response.Usage.OutputTokens,
TotalTokens: response.Usage.InputTokens + response.Usage.OutputTokens,
},
}
return &fullTextResponse
}
func streamResponseAli2OpenAI(aliResponse *AliResponse) *dto.ChatCompletionsStreamResponse {
var choice dto.ChatCompletionsStreamResponseChoice
choice.Delta.SetContentString(aliResponse.Output.Text)
if aliResponse.Output.FinishReason != "null" {
finishReason := aliResponse.Output.FinishReason
choice.FinishReason = &finishReason
}
response := dto.ChatCompletionsStreamResponse{
Id: aliResponse.RequestId,
Object: "chat.completion.chunk",
Created: common.GetTimestamp(),
Model: "ernie-bot",
Choices: []dto.ChatCompletionsStreamResponseChoice{choice},
}
return &response
}
func aliStreamHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
var usage dto.Usage
scanner := bufio.NewScanner(resp.Body)
scanner.Split(bufio.ScanLines)
dataChan := make(chan string)
stopChan := make(chan bool)
go func() {
for scanner.Scan() {
data := scanner.Text()
if len(data) < 5 { // ignore blank line or wrong format
continue
}
if data[:5] != "data:" {
continue
}
data = data[5:]
dataChan <- data
}
stopChan <- true
}()
helper.SetEventStreamHeaders(c)
lastResponseText := ""
c.Stream(func(w io.Writer) bool {
select {
case data := <-dataChan:
var aliResponse AliResponse
err := json.Unmarshal([]byte(data), &aliResponse)
if err != nil {
common.SysLog("error unmarshalling stream response: " + err.Error())
return true
}
if aliResponse.Usage.OutputTokens != 0 {
usage.PromptTokens = aliResponse.Usage.InputTokens
usage.CompletionTokens = aliResponse.Usage.OutputTokens
usage.TotalTokens = aliResponse.Usage.InputTokens + aliResponse.Usage.OutputTokens
}
response := streamResponseAli2OpenAI(&aliResponse)
response.Choices[0].Delta.SetContentString(strings.TrimPrefix(response.Choices[0].Delta.GetContentString(), lastResponseText))
lastResponseText = aliResponse.Output.Text
jsonResponse, err := json.Marshal(response)
if err != nil {
common.SysLog("error marshalling stream response: " + err.Error())
return true
}
c.Render(-1, common.CustomEvent{Data: "data: " + string(jsonResponse)})
return true
case <-stopChan:
c.Render(-1, common.CustomEvent{Data: "data: [DONE]"})
return false
}
})
service.CloseResponseBodyGracefully(resp)
return nil, &usage
}
func aliHandler(c *gin.Context, resp *http.Response) (*types.NewAPIError, *dto.Usage) {
var aliResponse AliResponse
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError), nil
}
service.CloseResponseBodyGracefully(resp)
err = json.Unmarshal(responseBody, &aliResponse)
if err != nil {
return types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError), nil
}
if aliResponse.Code != "" {
return types.WithOpenAIError(types.OpenAIError{
Message: aliResponse.Message,
Type: "ali_error",
Param: aliResponse.RequestId,
Code: aliResponse.Code,
}, resp.StatusCode), nil
}
fullTextResponse := responseAli2OpenAI(&aliResponse)
jsonResponse, err := common.Marshal(fullTextResponse)
if err != nil {
return types.NewError(err, types.ErrorCodeBadResponseBody), nil
}
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(resp.StatusCode)
_, err = c.Writer.Write(jsonResponse)
return nil, &fullTextResponse.Usage
}

View File

@@ -0,0 +1,132 @@
package minimax
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/openai"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type Adaptor struct {
}
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
if info.RelayMode != constant.RelayModeAudioSpeech {
return nil, errors.New("unsupported audio relay mode")
}
voiceID := request.Voice
speed := request.Speed
outputFormat := request.ResponseFormat
minimaxRequest := MiniMaxTTSRequest{
Model: info.OriginModelName,
Text: request.Input,
VoiceSetting: VoiceSetting{
VoiceID: voiceID,
Speed: speed,
},
AudioSetting: &AudioSetting{
Format: outputFormat,
},
OutputFormat: outputFormat,
}
// 同步扩展字段的厂商自定义metadata
if len(request.Metadata) > 0 {
if err := json.Unmarshal(request.Metadata, &minimaxRequest); err != nil {
return nil, fmt.Errorf("error unmarshalling metadata to minimax request: %w", err)
}
}
jsonData, err := json.Marshal(minimaxRequest)
if err != nil {
return nil, fmt.Errorf("error marshalling minimax request: %w", err)
}
if outputFormat != "hex" {
outputFormat = "url"
}
c.Set("response_format", outputFormat)
// Debug: log the request structure
// fmt.Printf("MiniMax TTS Request: %s\n", string(jsonData))
return bytes.NewReader(jsonData), nil
}
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
return request, nil
}
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
}
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
return GetRequestURL(info)
}
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
channel.SetupApiRequestHeader(info, c, req)
req.Set("Authorization", "Bearer "+info.ApiKey)
return nil
}
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
return request, nil
}
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
return nil, nil
}
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
return request, nil
}
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
return nil, errors.New("not implemented")
}
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
return channel.DoApiRequest(a, c, info, requestBody)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
if info.RelayMode == constant.RelayModeAudioSpeech {
return handleTTSResponse(c, resp, info)
}
adaptor := openai.Adaptor{}
return adaptor.DoResponse(c, resp, info)
}
func (a *Adaptor) GetModelList() []string {
return ModelList
}
func (a *Adaptor) GetChannelName() string {
return ChannelName
}

View File

@@ -8,6 +8,12 @@ var ModelList = []string{
"abab6-chat",
"abab5.5-chat",
"abab5.5s-chat",
"speech-2.5-hd-preview",
"speech-2.5-turbo-preview",
"speech-02-hd",
"speech-02-turbo",
"speech-01-hd",
"speech-01-turbo",
}
var ChannelName = "minimax"

View File

@@ -3,9 +3,23 @@ package minimax
import (
"fmt"
channelconstant "github.com/QuantumNous/new-api/constant"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant"
)
func GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", info.ChannelBaseUrl), nil
baseUrl := info.ChannelBaseUrl
if baseUrl == "" {
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeMiniMax]
}
switch info.RelayMode {
case constant.RelayModeChatCompletions:
return fmt.Sprintf("%s/v1/text/chatcompletion_v2", baseUrl), nil
case constant.RelayModeAudioSpeech:
return fmt.Sprintf("%s/v1/t2a_v2", baseUrl), nil
default:
return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
}
}

View File

@@ -0,0 +1,194 @@
package minimax
import (
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
)
type MiniMaxTTSRequest struct {
Model string `json:"model"`
Text string `json:"text"`
Stream bool `json:"stream,omitempty"`
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
VoiceSetting VoiceSetting `json:"voice_setting"`
PronunciationDict *PronunciationDict `json:"pronunciation_dict,omitempty"`
AudioSetting *AudioSetting `json:"audio_setting,omitempty"`
TimbreWeights []TimbreWeight `json:"timbre_weights,omitempty"`
LanguageBoost string `json:"language_boost,omitempty"`
VoiceModify *VoiceModify `json:"voice_modify,omitempty"`
SubtitleEnable bool `json:"subtitle_enable,omitempty"`
OutputFormat string `json:"output_format,omitempty"`
AigcWatermark bool `json:"aigc_watermark,omitempty"`
}
type StreamOptions struct {
ExcludeAggregatedAudio bool `json:"exclude_aggregated_audio,omitempty"`
}
type VoiceSetting struct {
VoiceID string `json:"voice_id"`
Speed float64 `json:"speed,omitempty"`
Vol float64 `json:"vol,omitempty"`
Pitch int `json:"pitch,omitempty"`
Emotion string `json:"emotion,omitempty"`
TextNormalization bool `json:"text_normalization,omitempty"`
LatexRead bool `json:"latex_read,omitempty"`
}
type PronunciationDict struct {
Tone []string `json:"tone,omitempty"`
}
type AudioSetting struct {
SampleRate int `json:"sample_rate,omitempty"`
Bitrate int `json:"bitrate,omitempty"`
Format string `json:"format,omitempty"`
Channel int `json:"channel,omitempty"`
ForceCbr bool `json:"force_cbr,omitempty"`
}
type TimbreWeight struct {
VoiceID string `json:"voice_id"`
Weight int `json:"weight"`
}
type VoiceModify struct {
Pitch int `json:"pitch,omitempty"`
Intensity int `json:"intensity,omitempty"`
Timbre int `json:"timbre,omitempty"`
SoundEffects string `json:"sound_effects,omitempty"`
}
type MiniMaxTTSResponse struct {
Data MiniMaxTTSData `json:"data"`
ExtraInfo MiniMaxExtraInfo `json:"extra_info"`
TraceID string `json:"trace_id"`
BaseResp MiniMaxBaseResp `json:"base_resp"`
}
type MiniMaxTTSData struct {
Audio string `json:"audio"`
Status int `json:"status"`
}
type MiniMaxExtraInfo struct {
UsageCharacters int64 `json:"usage_characters"`
}
type MiniMaxBaseResp struct {
StatusCode int64 `json:"status_code"`
StatusMsg string `json:"status_msg"`
}
func getContentTypeByFormat(format string) string {
contentTypeMap := map[string]string{
"mp3": "audio/mpeg",
"wav": "audio/wav",
"flac": "audio/flac",
"aac": "audio/aac",
"pcm": "audio/pcm",
}
if ct, ok := contentTypeMap[format]; ok {
return ct
}
return "audio/mpeg" // default to mp3
}
func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to read minimax response: %w", readErr),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// Parse response
var minimaxResp MiniMaxTTSResponse
if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to unmarshal minimax TTS response: %w", unmarshalErr),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
// Check base_resp status code
if minimaxResp.BaseResp.StatusCode != 0 {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("minimax TTS error: %d - %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
// Check if we have audio data
if minimaxResp.Data.Audio == "" {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("no audio data in minimax TTS response"),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
if strings.HasPrefix(minimaxResp.Data.Audio, "http") {
c.Redirect(http.StatusFound, minimaxResp.Data.Audio)
} else {
// Handle hex-encoded audio data
audioData, decodeErr := hex.DecodeString(minimaxResp.Data.Audio)
if decodeErr != nil {
return nil, types.NewErrorWithStatusCode(
fmt.Errorf("failed to decode hex audio data: %w", decodeErr),
types.ErrorCodeBadResponse,
http.StatusInternalServerError,
)
}
// Determine content type - default to mp3
contentType := "audio/mpeg"
c.Data(http.StatusOK, contentType, audioData)
}
usage = &dto.Usage{
PromptTokens: info.PromptTokens,
CompletionTokens: 0,
TotalTokens: int(minimaxResp.ExtraInfo.UsageCharacters),
}
return usage, nil
}
func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to read minimax response"),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
// Set response headers
for key, values := range resp.Header {
for _, value := range values {
c.Header(key, value)
}
}
c.Data(resp.StatusCode, "application/json", body)
return nil, nil
}

View File

@@ -121,7 +121,14 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
if chunk.Message != nil && len(chunk.Message.Thinking) > 0 {
raw := strings.TrimSpace(string(chunk.Message.Thinking))
if raw != "" && raw != "null" {
delta.Choices[0].Delta.SetReasoningContent(raw)
// Unmarshal the JSON string to get the actual content without quotes
var thinkingContent string
if err := json.Unmarshal(chunk.Message.Thinking, &thinkingContent); err == nil {
delta.Choices[0].Delta.SetReasoningContent(thinkingContent)
} else {
// Fallback to raw string if it's not a JSON string
delta.Choices[0].Delta.SetReasoningContent(raw)
}
}
}
// tool calls
@@ -209,7 +216,14 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
if ck.Message != nil && len(ck.Message.Thinking) > 0 {
raw := strings.TrimSpace(string(ck.Message.Thinking))
if raw != "" && raw != "null" {
reasoningBuilder.WriteString(raw)
// Unmarshal the JSON string to get the actual content without quotes
var thinkingContent string
if err := json.Unmarshal(ck.Message.Thinking, &thinkingContent); err == nil {
reasoningBuilder.WriteString(thinkingContent)
} else {
// Fallback to raw string if it's not a JSON string
reasoningBuilder.WriteString(raw)
}
}
}
if ck.Message != nil && ck.Message.Content != "" {
@@ -229,7 +243,14 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
if len(single.Message.Thinking) > 0 {
raw := strings.TrimSpace(string(single.Message.Thinking))
if raw != "" && raw != "null" {
reasoningBuilder.WriteString(raw)
// Unmarshal the JSON string to get the actual content without quotes
var thinkingContent string
if err := json.Unmarshal(single.Message.Thinking, &thinkingContent); err == nil {
reasoningBuilder.WriteString(thinkingContent)
} else {
// Fallback to raw string if it's not a JSON string
reasoningBuilder.WriteString(raw)
}
}
}
aggContent.WriteString(single.Message.Content)

View File

@@ -18,7 +18,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/ai360"
"github.com/QuantumNous/new-api/relay/channel/lingyiwanwu"
"github.com/QuantumNous/new-api/relay/channel/minimax"
//"github.com/QuantumNous/new-api/relay/channel/minimax"
"github.com/QuantumNous/new-api/relay/channel/openrouter"
"github.com/QuantumNous/new-api/relay/channel/xinference"
relaycommon "github.com/QuantumNous/new-api/relay/common"
@@ -161,8 +161,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, apiVersion)
}
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, requestURL, info.ChannelType), nil
case constant.ChannelTypeMiniMax:
return minimax.GetRequestURL(info)
//case constant.ChannelTypeMiniMax:
// return minimax.GetRequestURL(info)
case constant.ChannelTypeCustom:
url := info.ChannelBaseUrl
url = strings.Replace(url, "{model}", info.UpstreamModelName, -1)
@@ -599,8 +599,8 @@ func (a *Adaptor) GetModelList() []string {
return ai360.ModelList
case constant.ChannelTypeLingYiWanWu:
return lingyiwanwu.ModelList
case constant.ChannelTypeMiniMax:
return minimax.ModelList
//case constant.ChannelTypeMiniMax:
// return minimax.ModelList
case constant.ChannelTypeXinference:
return xinference.ModelList
case constant.ChannelTypeOpenRouter:
@@ -616,8 +616,8 @@ func (a *Adaptor) GetChannelName() string {
return ai360.ChannelName
case constant.ChannelTypeLingYiWanWu:
return lingyiwanwu.ChannelName
case constant.ChannelTypeMiniMax:
return minimax.ChannelName
//case constant.ChannelTypeMiniMax:
// return minimax.ChannelName
case constant.ChannelTypeXinference:
return xinference.ChannelName
case constant.ChannelTypeOpenRouter:

View File

@@ -37,8 +37,57 @@ func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayIn
}
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
//TODO implement me
return nil, errors.New("not implemented")
if info.RelayMode != constant.RelayModeAudioSpeech {
return nil, errors.New("unsupported audio relay mode")
}
appID, token, err := parseVolcengineAuth(info.ApiKey)
if err != nil {
return nil, err
}
voiceType := mapVoiceType(request.Voice)
speedRatio := request.Speed
encoding := mapEncoding(request.ResponseFormat)
c.Set("response_format", encoding)
volcRequest := VolcengineTTSRequest{
App: VolcengineTTSApp{
AppID: appID,
Token: token,
Cluster: "volcano_tts",
},
User: VolcengineTTSUser{
UID: "openai_relay_user",
},
Audio: VolcengineTTSAudio{
VoiceType: voiceType,
Encoding: encoding,
SpeedRatio: speedRatio,
Rate: 24000,
},
Request: VolcengineTTSReqInfo{
ReqID: generateRequestID(),
Text: request.Input,
Operation: "query",
Model: info.OriginModelName,
},
}
// 同步扩展字段的厂商自定义metadata
if len(request.Metadata) > 0 {
if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
}
}
jsonData, err := json.Marshal(volcRequest)
if err != nil {
return nil, fmt.Errorf("error marshalling volcengine request: %w", err)
}
return bytes.NewReader(jsonData), nil
}
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
@@ -190,7 +239,6 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
}
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
// 支持自定义域名,如果未设置则使用默认域名
baseUrl := info.ChannelBaseUrl
if baseUrl == "" {
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
@@ -217,6 +265,12 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
return fmt.Sprintf("%s/api/v3/images/edits", baseUrl), nil
case constant.RelayModeRerank:
return fmt.Sprintf("%s/api/v3/rerank", baseUrl), nil
case constant.RelayModeAudioSpeech:
// 只有当 baseUrl 是火山默认的官方Url时才改为官方的的TTS接口否则走透传的New接口
if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
return "https://openspeech.bytedance.com/api/v1/tts", nil
}
return fmt.Sprintf("%s/v1/audio/speech", baseUrl), nil
default:
}
}
@@ -225,6 +279,16 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
channel.SetupApiRequestHeader(info, c, req)
if info.RelayMode == constant.RelayModeAudioSpeech {
parts := strings.Split(info.ApiKey, "|")
if len(parts) == 2 {
req.Set("Authorization", "Bearer;"+parts[1])
}
req.Set("Content-Type", "application/json")
return nil
}
req.Set("Authorization", "Bearer "+info.ApiKey)
return nil
}
@@ -260,6 +324,11 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
if info.RelayMode == constant.RelayModeAudioSpeech {
encoding := mapEncoding(c.GetString("response_format"))
return handleTTSResponse(c, resp, info, encoding)
}
adaptor := openai.Adaptor{}
usage, err = adaptor.DoResponse(c, resp, info)
return

View File

@@ -0,0 +1,194 @@
package volcengine
import (
"encoding/base64"
"encoding/json"
"errors"
"io"
"net/http"
"strings"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
)
type VolcengineTTSRequest struct {
App VolcengineTTSApp `json:"app"`
User VolcengineTTSUser `json:"user"`
Audio VolcengineTTSAudio `json:"audio"`
Request VolcengineTTSReqInfo `json:"request"`
}
type VolcengineTTSApp struct {
AppID string `json:"appid"`
Token string `json:"token"`
Cluster string `json:"cluster"`
}
type VolcengineTTSUser struct {
UID string `json:"uid"`
}
type VolcengineTTSAudio struct {
VoiceType string `json:"voice_type"`
Encoding string `json:"encoding"`
SpeedRatio float64 `json:"speed_ratio"`
Rate int `json:"rate"`
Bitrate int `json:"bitrate,omitempty"`
LoudnessRatio float64 `json:"loudness_ratio,omitempty"`
EnableEmotion bool `json:"enable_emotion,omitempty"`
Emotion string `json:"emotion,omitempty"`
EmotionScale float64 `json:"emotion_scale,omitempty"`
ExplicitLanguage string `json:"explicit_language,omitempty"`
ContextLanguage string `json:"context_language,omitempty"`
}
type VolcengineTTSReqInfo struct {
ReqID string `json:"reqid"`
Text string `json:"text"`
Operation string `json:"operation"`
Model string `json:"model,omitempty"`
TextType string `json:"text_type,omitempty"`
SilenceDuration float64 `json:"silence_duration,omitempty"`
WithTimestamp interface{} `json:"with_timestamp,omitempty"`
ExtraParam *VolcengineTTSExtraParam `json:"extra_param,omitempty"`
}
type VolcengineTTSExtraParam struct {
DisableMarkdownFilter bool `json:"disable_markdown_filter,omitempty"`
EnableLatexTn bool `json:"enable_latex_tn,omitempty"`
MuteCutThreshold string `json:"mute_cut_threshold,omitempty"`
MuteCutRemainMs string `json:"mute_cut_remain_ms,omitempty"`
DisableEmojiFilter bool `json:"disable_emoji_filter,omitempty"`
UnsupportedCharRatioThresh float64 `json:"unsupported_char_ratio_thresh,omitempty"`
AigcWatermark bool `json:"aigc_watermark,omitempty"`
CacheConfig *VolcengineTTSCacheConfig `json:"cache_config,omitempty"`
}
type VolcengineTTSCacheConfig struct {
TextType int `json:"text_type,omitempty"`
UseCache bool `json:"use_cache,omitempty"`
}
type VolcengineTTSResponse struct {
ReqID string `json:"reqid"`
Code int `json:"code"`
Message string `json:"message"`
Sequence int `json:"sequence"`
Data string `json:"data"`
Addition *VolcengineTTSAdditionInfo `json:"addition,omitempty"`
}
type VolcengineTTSAdditionInfo struct {
Duration string `json:"duration"`
}
var openAIToVolcengineVoiceMap = map[string]string{
"alloy": "zh_male_M392_conversation_wvae_bigtts",
"echo": "zh_male_wenhao_mars_bigtts",
"fable": "zh_female_tianmei_mars_bigtts",
"onyx": "zh_male_zhibei_mars_bigtts",
"nova": "zh_female_shuangkuaisisi_mars_bigtts",
"shimmer": "zh_female_cancan_mars_bigtts",
}
var responseFormatToEncodingMap = map[string]string{
"mp3": "mp3",
"opus": "ogg_opus",
"aac": "mp3",
"flac": "mp3",
"wav": "wav",
"pcm": "pcm",
}
func parseVolcengineAuth(apiKey string) (appID, token string, err error) {
parts := strings.Split(apiKey, "|")
if len(parts) != 2 {
return "", "", errors.New("invalid api key format, expected: appid|access_token")
}
return parts[0], parts[1], nil
}
func mapVoiceType(openAIVoice string) string {
if voice, ok := openAIToVolcengineVoiceMap[openAIVoice]; ok {
return voice
}
return openAIVoice
}
func mapEncoding(responseFormat string) string {
if encoding, ok := responseFormatToEncodingMap[responseFormat]; ok {
return encoding
}
return "mp3"
}
func getContentTypeByEncoding(encoding string) string {
contentTypeMap := map[string]string{
"mp3": "audio/mpeg",
"ogg_opus": "audio/ogg",
"wav": "audio/wav",
"pcm": "audio/pcm",
}
if ct, ok := contentTypeMap[encoding]; ok {
return ct
}
return "application/octet-stream"
}
func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo, encoding string) (usage any, err *types.NewAPIError) {
body, readErr := io.ReadAll(resp.Body)
if readErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to read volcengine response"),
types.ErrorCodeReadResponseBodyFailed,
http.StatusInternalServerError,
)
}
defer resp.Body.Close()
var volcResp VolcengineTTSResponse
if unmarshalErr := json.Unmarshal(body, &volcResp); unmarshalErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to parse volcengine response"),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
if volcResp.Code != 3000 {
return nil, types.NewErrorWithStatusCode(
errors.New(volcResp.Message),
types.ErrorCodeBadResponse,
http.StatusBadRequest,
)
}
audioData, decodeErr := base64.StdEncoding.DecodeString(volcResp.Data)
if decodeErr != nil {
return nil, types.NewErrorWithStatusCode(
errors.New("failed to decode audio data"),
types.ErrorCodeBadResponseBody,
http.StatusInternalServerError,
)
}
contentType := getContentTypeByEncoding(encoding)
c.Header("Content-Type", contentType)
c.Data(http.StatusOK, contentType, audioData)
usage = &dto.Usage{
PromptTokens: info.PromptTokens,
CompletionTokens: 0,
TotalTokens: info.PromptTokens,
}
return usage, nil
}
func generateRequestID() string {
return uuid.New().String()
}

View File

@@ -263,6 +263,7 @@ var streamSupportedChannels = map[int]bool{
constant.ChannelTypeDeepSeek: true,
constant.ChannelTypeBaiduV2: true,
constant.ChannelTypeZhipu_v4: true,
constant.ChannelTypeAli: true,
}
func GenRelayInfoWs(c *gin.Context, ws *websocket.Conn) *RelayInfo {

View File

@@ -22,8 +22,10 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt
case types.RelayFormatOpenAI:
request, err = GetAndValidateTextRequest(c, relayMode)
case types.RelayFormatGemini:
if strings.Contains(c.Request.URL.Path, ":embedContent") || strings.Contains(c.Request.URL.Path, ":batchEmbedContents") {
if strings.Contains(c.Request.URL.Path, ":embedContent") {
request, err = GetAndValidateGeminiEmbeddingRequest(c)
} else if strings.Contains(c.Request.URL.Path, ":batchEmbedContents") {
request, err = GetAndValidateGeminiBatchEmbeddingRequest(c)
} else {
request, err = GetAndValidateGeminiRequest(c)
}
@@ -319,3 +321,12 @@ func GetAndValidateGeminiEmbeddingRequest(c *gin.Context) (*dto.GeminiEmbeddingR
}
return request, nil
}
func GetAndValidateGeminiBatchEmbeddingRequest(c *gin.Context) (*dto.GeminiBatchEmbeddingRequest, error) {
request := &dto.GeminiBatchEmbeddingRequest{}
err := common.UnmarshalBodyReusable(c, request)
if err != nil {
return nil, err
}
return request, nil
}

View File

@@ -18,6 +18,7 @@ import (
"github.com/QuantumNous/new-api/relay/channel/gemini"
"github.com/QuantumNous/new-api/relay/channel/jimeng"
"github.com/QuantumNous/new-api/relay/channel/jina"
"github.com/QuantumNous/new-api/relay/channel/minimax"
"github.com/QuantumNous/new-api/relay/channel/mistral"
"github.com/QuantumNous/new-api/relay/channel/mokaai"
"github.com/QuantumNous/new-api/relay/channel/moonshot"
@@ -108,6 +109,8 @@ func GetAdaptor(apiType int) channel.Adaptor {
return &moonshot.Adaptor{} // Moonshot uses Claude API
case constant.APITypeSubmodel:
return &submodel.Adaptor{}
case constant.APITypeMiniMax:
return &minimax.Adaptor{}
}
return nil
}

View File

@@ -107,6 +107,8 @@ function type2secretPrompt(type) {
return '按照如下格式输入AppId|SecretId|SecretKey';
case 33:
return '按照如下格式输入Ak|Sk|Region';
case 45:
return '请输入渠道对应的鉴权密钥, 豆包语音输入AppId|AccessToken';
case 50:
return '按照如下格式输入: AccessKey|SecretKey, 如果上游是New API则直接输ApiKey';
case 51: