mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-19 03:07:27 +00:00
391 lines
12 KiB
Go
391 lines
12 KiB
Go
package volcengine
|
||
|
||
import (
|
||
"bytes"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"mime/multipart"
|
||
"net/http"
|
||
"net/textproto"
|
||
"path/filepath"
|
||
"strings"
|
||
|
||
channelconstant "github.com/QuantumNous/new-api/constant"
|
||
"github.com/QuantumNous/new-api/dto"
|
||
"github.com/QuantumNous/new-api/relay/channel"
|
||
"github.com/QuantumNous/new-api/relay/channel/openai"
|
||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||
"github.com/QuantumNous/new-api/relay/constant"
|
||
"github.com/QuantumNous/new-api/types"
|
||
|
||
"github.com/gin-gonic/gin"
|
||
)
|
||
|
||
type Adaptor struct {
|
||
}
|
||
|
||
func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
|
||
//TODO implement me
|
||
return nil, errors.New("not implemented")
|
||
}
|
||
|
||
func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
|
||
adaptor := openai.Adaptor{}
|
||
return adaptor.ConvertClaudeRequest(c, info, req)
|
||
}
|
||
|
||
func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
|
||
if info.RelayMode != constant.RelayModeAudioSpeech {
|
||
return nil, errors.New("unsupported audio relay mode")
|
||
}
|
||
|
||
appID, token, err := parseVolcengineAuth(info.ApiKey)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
voiceType := mapVoiceType(request.Voice)
|
||
speedRatio := request.Speed
|
||
encoding := mapEncoding(request.ResponseFormat)
|
||
|
||
c.Set("response_format", encoding)
|
||
|
||
volcRequest := VolcengineTTSRequest{
|
||
App: VolcengineTTSApp{
|
||
AppID: appID,
|
||
Token: token,
|
||
Cluster: "volcano_tts",
|
||
},
|
||
User: VolcengineTTSUser{
|
||
UID: "openai_relay_user",
|
||
},
|
||
Audio: VolcengineTTSAudio{
|
||
VoiceType: voiceType,
|
||
Encoding: encoding,
|
||
SpeedRatio: speedRatio,
|
||
Rate: 24000,
|
||
},
|
||
Request: VolcengineTTSReqInfo{
|
||
ReqID: generateRequestID(),
|
||
Text: request.Input,
|
||
Operation: "submit", // WebSocket uses "submit"
|
||
Model: info.OriginModelName,
|
||
},
|
||
}
|
||
|
||
// 同步扩展字段的厂商自定义metadata
|
||
if len(request.Metadata) > 0 {
|
||
if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
|
||
return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
|
||
}
|
||
}
|
||
|
||
// Store the request in context for WebSocket handler
|
||
c.Set("volcengine_tts_request", volcRequest)
|
||
|
||
// Return nil as WebSocket doesn't use traditional request body
|
||
return nil, nil
|
||
}
|
||
|
||
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
|
||
switch info.RelayMode {
|
||
case constant.RelayModeImagesGenerations:
|
||
return request, nil
|
||
case constant.RelayModeImagesEdits:
|
||
|
||
var requestBody bytes.Buffer
|
||
writer := multipart.NewWriter(&requestBody)
|
||
|
||
writer.WriteField("model", request.Model)
|
||
// 获取所有表单字段
|
||
formData := c.Request.PostForm
|
||
// 遍历表单字段并打印输出
|
||
for key, values := range formData {
|
||
if key == "model" {
|
||
continue
|
||
}
|
||
for _, value := range values {
|
||
writer.WriteField(key, value)
|
||
}
|
||
}
|
||
|
||
// Parse the multipart form to handle both single image and multiple images
|
||
if err := c.Request.ParseMultipartForm(32 << 20); err != nil { // 32MB max memory
|
||
return nil, errors.New("failed to parse multipart form")
|
||
}
|
||
|
||
if c.Request.MultipartForm != nil && c.Request.MultipartForm.File != nil {
|
||
// Check if "image" field exists in any form, including array notation
|
||
var imageFiles []*multipart.FileHeader
|
||
var exists bool
|
||
|
||
// First check for standard "image" field
|
||
if imageFiles, exists = c.Request.MultipartForm.File["image"]; !exists || len(imageFiles) == 0 {
|
||
// If not found, check for "image[]" field
|
||
if imageFiles, exists = c.Request.MultipartForm.File["image[]"]; !exists || len(imageFiles) == 0 {
|
||
// If still not found, iterate through all fields to find any that start with "image["
|
||
foundArrayImages := false
|
||
for fieldName, files := range c.Request.MultipartForm.File {
|
||
if strings.HasPrefix(fieldName, "image[") && len(files) > 0 {
|
||
foundArrayImages = true
|
||
for _, file := range files {
|
||
imageFiles = append(imageFiles, file)
|
||
}
|
||
}
|
||
}
|
||
|
||
// If no image fields found at all
|
||
if !foundArrayImages && (len(imageFiles) == 0) {
|
||
return nil, errors.New("image is required")
|
||
}
|
||
}
|
||
}
|
||
|
||
// Process all image files
|
||
for i, fileHeader := range imageFiles {
|
||
file, err := fileHeader.Open()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to open image file %d: %w", i, err)
|
||
}
|
||
defer file.Close()
|
||
|
||
// If multiple images, use image[] as the field name
|
||
fieldName := "image"
|
||
if len(imageFiles) > 1 {
|
||
fieldName = "image[]"
|
||
}
|
||
|
||
// Determine MIME type based on file extension
|
||
mimeType := detectImageMimeType(fileHeader.Filename)
|
||
|
||
// Create a form file with the appropriate content type
|
||
h := make(textproto.MIMEHeader)
|
||
h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="%s"; filename="%s"`, fieldName, fileHeader.Filename))
|
||
h.Set("Content-Type", mimeType)
|
||
|
||
part, err := writer.CreatePart(h)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("create form part failed for image %d: %w", i, err)
|
||
}
|
||
|
||
if _, err := io.Copy(part, file); err != nil {
|
||
return nil, fmt.Errorf("copy file failed for image %d: %w", i, err)
|
||
}
|
||
}
|
||
|
||
// Handle mask file if present
|
||
if maskFiles, exists := c.Request.MultipartForm.File["mask"]; exists && len(maskFiles) > 0 {
|
||
maskFile, err := maskFiles[0].Open()
|
||
if err != nil {
|
||
return nil, errors.New("failed to open mask file")
|
||
}
|
||
defer maskFile.Close()
|
||
|
||
// Determine MIME type for mask file
|
||
mimeType := detectImageMimeType(maskFiles[0].Filename)
|
||
|
||
// Create a form file with the appropriate content type
|
||
h := make(textproto.MIMEHeader)
|
||
h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="mask"; filename="%s"`, maskFiles[0].Filename))
|
||
h.Set("Content-Type", mimeType)
|
||
|
||
maskPart, err := writer.CreatePart(h)
|
||
if err != nil {
|
||
return nil, errors.New("create form file failed for mask")
|
||
}
|
||
|
||
if _, err := io.Copy(maskPart, maskFile); err != nil {
|
||
return nil, errors.New("copy mask file failed")
|
||
}
|
||
}
|
||
} else {
|
||
return nil, errors.New("no multipart form data found")
|
||
}
|
||
|
||
// 关闭 multipart 编写器以设置分界线
|
||
writer.Close()
|
||
c.Request.Header.Set("Content-Type", writer.FormDataContentType())
|
||
return bytes.NewReader(requestBody.Bytes()), nil
|
||
|
||
default:
|
||
return request, nil
|
||
}
|
||
}
|
||
|
||
// detectImageMimeType determines the MIME type based on the file extension
|
||
func detectImageMimeType(filename string) string {
|
||
ext := strings.ToLower(filepath.Ext(filename))
|
||
switch ext {
|
||
case ".jpg", ".jpeg":
|
||
return "image/jpeg"
|
||
case ".png":
|
||
return "image/png"
|
||
case ".webp":
|
||
return "image/webp"
|
||
default:
|
||
// Try to detect from extension if possible
|
||
if strings.HasPrefix(ext, ".jp") {
|
||
return "image/jpeg"
|
||
}
|
||
// Default to png as a fallback
|
||
return "image/png"
|
||
}
|
||
}
|
||
|
||
func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
||
}
|
||
|
||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||
baseUrl := info.ChannelBaseUrl
|
||
if baseUrl == "" {
|
||
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
|
||
}
|
||
|
||
switch info.RelayFormat {
|
||
case types.RelayFormatClaude:
|
||
if strings.HasPrefix(info.UpstreamModelName, "bot") {
|
||
return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
|
||
}
|
||
return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
|
||
default:
|
||
switch info.RelayMode {
|
||
case constant.RelayModeChatCompletions:
|
||
if strings.HasPrefix(info.UpstreamModelName, "bot") {
|
||
return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
|
||
}
|
||
return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
|
||
case constant.RelayModeEmbeddings:
|
||
return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil
|
||
case constant.RelayModeImagesGenerations:
|
||
return fmt.Sprintf("%s/api/v3/images/generations", baseUrl), nil
|
||
case constant.RelayModeImagesEdits:
|
||
return fmt.Sprintf("%s/api/v3/images/edits", baseUrl), nil
|
||
case constant.RelayModeRerank:
|
||
return fmt.Sprintf("%s/api/v3/rerank", baseUrl), nil
|
||
case constant.RelayModeAudioSpeech:
|
||
// 只有当 baseUrl 是火山默认的官方Url时才改为官方的的TTS接口,否则走透传的New接口
|
||
if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
|
||
return "wss://openspeech.bytedance.com/api/v1/tts/ws_binary", nil
|
||
}
|
||
return fmt.Sprintf("%s/v1/audio/speech", baseUrl), nil
|
||
default:
|
||
}
|
||
}
|
||
return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
|
||
}
|
||
|
||
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
|
||
channel.SetupApiRequestHeader(info, c, req)
|
||
|
||
if info.RelayMode == constant.RelayModeAudioSpeech {
|
||
parts := strings.Split(info.ApiKey, "|")
|
||
if len(parts) == 2 {
|
||
req.Set("Authorization", "Bearer;"+parts[1])
|
||
}
|
||
req.Set("Content-Type", "application/json")
|
||
return nil
|
||
}
|
||
|
||
req.Set("Authorization", "Bearer "+info.ApiKey)
|
||
return nil
|
||
}
|
||
|
||
func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
|
||
if request == nil {
|
||
return nil, errors.New("request is nil")
|
||
}
|
||
// 适配 方舟deepseek混合模型 的 thinking 后缀
|
||
if strings.HasSuffix(info.UpstreamModelName, "-thinking") && strings.HasPrefix(info.UpstreamModelName, "deepseek") {
|
||
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
|
||
request.Model = info.UpstreamModelName
|
||
request.THINKING = json.RawMessage(`{"type": "enabled"}`)
|
||
}
|
||
return request, nil
|
||
}
|
||
|
||
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
|
||
return nil, nil
|
||
}
|
||
|
||
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
|
||
return request, nil
|
||
}
|
||
|
||
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
||
// TODO implement me
|
||
return nil, errors.New("not implemented")
|
||
}
|
||
|
||
func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
|
||
// For TTS with WebSocket, skip traditional HTTP request
|
||
if info.RelayMode == constant.RelayModeAudioSpeech {
|
||
baseUrl := info.ChannelBaseUrl
|
||
if baseUrl == "" {
|
||
baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
|
||
}
|
||
// Only use WebSocket for official Volcengine endpoint
|
||
if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
|
||
return nil, nil // WebSocket handling will be done in DoResponse
|
||
}
|
||
}
|
||
return channel.DoApiRequest(a, c, info, requestBody)
|
||
}
|
||
|
||
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||
if info.RelayMode == constant.RelayModeAudioSpeech {
|
||
encoding := mapEncoding(c.GetString("response_format"))
|
||
|
||
// Check if this is WebSocket mode (resp will be nil for WebSocket)
|
||
if resp == nil {
|
||
// Get the WebSocket URL
|
||
requestURL, urlErr := a.GetRequestURL(info)
|
||
if urlErr != nil {
|
||
return nil, types.NewErrorWithStatusCode(
|
||
urlErr,
|
||
types.ErrorCodeBadRequestBody,
|
||
http.StatusInternalServerError,
|
||
)
|
||
}
|
||
|
||
// Retrieve the volcengine request from context
|
||
volcRequestInterface, exists := c.Get("volcengine_tts_request")
|
||
if !exists {
|
||
return nil, types.NewErrorWithStatusCode(
|
||
errors.New("volcengine TTS request not found in context"),
|
||
types.ErrorCodeBadRequestBody,
|
||
http.StatusInternalServerError,
|
||
)
|
||
}
|
||
|
||
volcRequest, ok := volcRequestInterface.(VolcengineTTSRequest)
|
||
if !ok {
|
||
return nil, types.NewErrorWithStatusCode(
|
||
errors.New("invalid volcengine TTS request type"),
|
||
types.ErrorCodeBadRequestBody,
|
||
http.StatusInternalServerError,
|
||
)
|
||
}
|
||
|
||
// Handle WebSocket streaming
|
||
return handleTTSWebSocketResponse(c, requestURL, volcRequest, info, encoding)
|
||
}
|
||
|
||
// Handle traditional HTTP response
|
||
return handleTTSResponse(c, resp, info, encoding)
|
||
}
|
||
|
||
adaptor := openai.Adaptor{}
|
||
usage, err = adaptor.DoResponse(c, resp, info)
|
||
return
|
||
}
|
||
|
||
func (a *Adaptor) GetModelList() []string {
|
||
return ModelList
|
||
}
|
||
|
||
func (a *Adaptor) GetChannelName() string {
|
||
return ChannelName
|
||
}
|