diff --git a/relay/channel/api_request.go b/relay/channel/api_request.go index 1ff1e2392..22426c69e 100644 --- a/relay/channel/api_request.go +++ b/relay/channel/api_request.go @@ -27,6 +27,8 @@ import ( func SetupApiRequestHeader(info *common.RelayInfo, c *gin.Context, req *http.Header) { if info.RelayMode == constant.RelayModeAudioTranscription || info.RelayMode == constant.RelayModeAudioTranslation { // multipart/form-data + } else if info.RelayMode == constant.RelayModeImagesEdits { + // multipart/form-data } else if info.RelayMode == constant.RelayModeRealtime { // websocket } else { diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go index 021ed0623..fcc99b662 100644 --- a/relay/channel/gemini/adaptor.go +++ b/relay/channel/gemini/adaptor.go @@ -142,11 +142,29 @@ func processSizeParameters(size, quality string) ImageConfig { } func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) { - if strings.HasPrefix(info.UpstreamModelName, "gemini-3-pro-image") { + if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) { + var content any + if base64Data, err := relaycommon.GetImageBase64sFromForm(c); err == nil { + content = []any{ + dto.MediaContent{ + Type: dto.ContentTypeText, + Text: request.Prompt, + }, + dto.MediaContent{ + Type: dto.ContentTypeFile, + File: &dto.MessageFile{ + FileData: base64Data.String(), + }, + }, + } + } else { + content = request.Prompt + } + chatRequest := dto.GeneralOpenAIRequest{ Model: request.Model, Messages: []dto.Message{ - {Role: "user", Content: request.Prompt}, + {Role: "user", Content: content}, }, N: int(request.N), } diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index 7776847be..9da33b308 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -183,7 +183,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel } // Setting safety to the lowest possible values since Gemini is already powerless enough -func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) { +func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo, base64Data ...*relaycommon.Base64Data) (*dto.GeminiChatRequest, error) { geminiRequest := dto.GeminiChatRequest{ Contents: make([]dto.GeminiChatContent, 0, len(textRequest.Messages)), @@ -464,10 +464,11 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i }) } } else if part.Type == dto.ContentTypeFile { - if part.GetFile().FileId != "" { + file := part.GetFile() + if file.FileId != "" { return nil, fmt.Errorf("only base64 file is supported in gemini") } - format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData) + format, base64String, err := service.DecodeBase64FileData(file.FileData) if err != nil { return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error()) } diff --git a/relay/common/relay_utils.go b/relay/common/relay_utils.go index b662f9053..1159298c4 100644 --- a/relay/common/relay_utils.go +++ b/relay/common/relay_utils.go @@ -1,7 +1,10 @@ package common import ( + "encoding/base64" + "errors" "fmt" + "io" "net/http" "strconv" "strings" @@ -226,3 +229,54 @@ func ValidateBasicTaskRequest(c *gin.Context, info *RelayInfo, action string) *d storeTaskRequest(c, info, action, req) return nil } +func GetImagesBase64sFromForm(c *gin.Context) ([]*Base64Data, error) { + return GetBase64sFromForm(c, "image") +} +func GetImageBase64sFromForm(c *gin.Context) (*Base64Data, error) { + base64s, err := GetImagesBase64sFromForm(c) + if err != nil { + return nil, err + } + return base64s[0], nil +} + +type Base64Data struct { + MimeType string + Data string +} + +func (m Base64Data) String() string { + return fmt.Sprintf("data:%s;base64,%s", m.MimeType, m.Data) +} +func GetBase64sFromForm(c *gin.Context, fieldName string) ([]*Base64Data, error) { + mf := c.Request.MultipartForm + if mf == nil { + if _, err := c.MultipartForm(); err != nil { + return nil, fmt.Errorf("failed to parse image edit form request: %w", err) + } + mf = c.Request.MultipartForm + } + imageFiles, exists := mf.File[fieldName] + if !exists || len(imageFiles) == 0 { + return nil, errors.New("field " + fieldName + " is not found or empty") + } + var imageBase64s []*Base64Data + for _, file := range imageFiles { + image, err := file.Open() + if err != nil { + return nil, errors.New("failed to open image file") + } + defer image.Close() + imageData, err := io.ReadAll(image) + if err != nil { + return nil, errors.New("failed to read image file") + } + mimeType := http.DetectContentType(imageData) + base64Data := base64.StdEncoding.EncodeToString(imageData) + imageBase64s = append(imageBase64s, &Base64Data{ + MimeType: mimeType, + Data: base64Data, + }) + } + return imageBase64s, nil +} diff --git a/relay/helper/valid_request.go b/relay/helper/valid_request.go index 3bdfa6ff4..e6e8dc989 100644 --- a/relay/helper/valid_request.go +++ b/relay/helper/valid_request.go @@ -141,6 +141,7 @@ func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageReq imageRequest.N = uint(common.String2Int(formData.Get("n"))) imageRequest.Quality = formData.Get("quality") imageRequest.Size = formData.Get("size") + imageRequest.ResponseFormat = formData.Get("response_format") if imageValue := formData.Get("image"); imageValue != "" { imageRequest.Image, _ = json.Marshal(imageValue) }