Compare commits

...

8 Commits

Author SHA1 Message Date
Calcium-Ion
b19b2d62df Merge pull request #2339 from QuantumNous/revert-2330-pr/fix-nano-banana-err
Revert "fix: nano-banana not compatible imageSize"
2025-11-30 18:48:09 +08:00
Calcium-Ion
f9c8624f2c Merge pull request #2338 from QuantumNous/revert-2321-pr/gemini-image-edit
Revert "Gemini Image系列支持图像编辑"
2025-11-30 18:48:01 +08:00
Calcium-Ion
6c8253156b Merge pull request #2337 from QuantumNous/revert-2315-pr/gemini-veo3.1-i2v
Revert "Gemini Veo3.1[AI Studio]增加图生视频支持"
2025-11-30 18:47:50 +08:00
Calcium-Ion
a66b314f5b Merge pull request #2336 from QuantumNous/revert-2309-pr/fix-gemini-ImageConfig
Revert "fix: gemini image correct generationConfig"
2025-11-30 18:47:39 +08:00
Seefs
e29ff0060d Revert "fix: nano-banana not compatible imageSize" 2025-11-30 18:46:10 +08:00
Seefs
d4a2c2ab54 Revert "Gemini Image系列支持图像编辑" 2025-11-30 18:45:54 +08:00
Seefs
ded463ee57 Revert "Gemini Veo3.1[AI Studio]增加图生视频支持" 2025-11-30 18:45:37 +08:00
Seefs
e337936227 Revert "fix: gemini image correct generationConfig" 2025-11-30 18:45:23 +08:00
6 changed files with 38 additions and 178 deletions

View File

@@ -27,8 +27,6 @@ import (
func SetupApiRequestHeader(info *common.RelayInfo, c *gin.Context, req *http.Header) {
if info.RelayMode == constant.RelayModeAudioTranscription || info.RelayMode == constant.RelayModeAudioTranslation {
// multipart/form-data
} else if info.RelayMode == constant.RelayModeImagesEdits {
// multipart/form-data
} else if info.RelayMode == constant.RelayModeRealtime {
// websocket
} else {

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"io"
"net/http"
"slices"
"strings"
"github.com/QuantumNous/new-api/dto"
@@ -87,25 +86,12 @@ func getImageSizeMapping() QualityMapping {
func getSizeMappings() map[string]SizeMapping {
return map[string]SizeMapping{
// Gemini 2.5 Flash Image - default 1K resolutions
"1024x1024": {AspectRatio: "1:1", ImageSize: ""},
"832x1248": {AspectRatio: "2:3", ImageSize: ""},
"1248x832": {AspectRatio: "3:2", ImageSize: ""},
"864x1184": {AspectRatio: "3:4", ImageSize: ""},
"1184x864": {AspectRatio: "4:3", ImageSize: ""},
"896x1152": {AspectRatio: "4:5", ImageSize: ""},
"1152x896": {AspectRatio: "5:4", ImageSize: ""},
"768x1344": {AspectRatio: "9:16", ImageSize: ""},
"1344x768": {AspectRatio: "16:9", ImageSize: ""},
"1536x672": {AspectRatio: "21:9", ImageSize: ""},
// Gemini 3 Pro Image Preview resolutions
"1536x1024": {AspectRatio: "3:2", ImageSize: ""},
"1024x1536": {AspectRatio: "2:3", ImageSize: ""},
"1024x1792": {AspectRatio: "9:16", ImageSize: ""},
"1792x1024": {AspectRatio: "16:9", ImageSize: ""},
"2048x2048": {AspectRatio: "1:1", ImageSize: "2K"},
"4096x4096": {AspectRatio: "1:1", ImageSize: "4K"},
"2048x2048": {AspectRatio: "", ImageSize: "2K"},
"4096x4096": {AspectRatio: "", ImageSize: "4K"},
}
}
@@ -143,48 +129,24 @@ func processSizeParameters(size, quality string) ImageConfig {
}
func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
var content any
if base64Data, err := relaycommon.GetImageBase64sFromForm(c); err == nil {
content = []any{
dto.MediaContent{
Type: dto.ContentTypeText,
Text: request.Prompt,
},
dto.MediaContent{
Type: dto.ContentTypeFile,
File: &dto.MessageFile{
FileData: base64Data.String(),
},
},
}
} else {
content = request.Prompt
}
if strings.HasPrefix(info.UpstreamModelName, "gemini-3-pro-image") {
chatRequest := dto.GeneralOpenAIRequest{
Model: request.Model,
Messages: []dto.Message{
{Role: "user", Content: content},
{Role: "user", Content: request.Prompt},
},
N: int(request.N),
}
config := processSizeParameters(strings.TrimSpace(request.Size), request.Quality)
// 兼容 nano-banana 传quality[imageSize]会报错 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting
if slices.Contains([]string{"nano-banana", "gemini-2.5-flash-image"}, info.UpstreamModelName) {
config.ImageSize = ""
}
googleGenerationConfig := map[string]interface{}{
"responseModalities": []string{"TEXT", "IMAGE"},
"imageConfig": config,
"response_modalities": []string{"TEXT", "IMAGE"},
"image_config": config,
}
extraBody := map[string]interface{}{
"google": map[string]interface{}{
"generationConfig": googleGenerationConfig,
"generation_config": googleGenerationConfig,
},
}
chatRequest.ExtraBody, _ = json.Marshal(extraBody)

View File

@@ -183,7 +183,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
}
// Setting safety to the lowest possible values since Gemini is already powerless enough
func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo, base64Data ...*relaycommon.Base64Data) (*dto.GeminiChatRequest, error) {
func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) {
geminiRequest := dto.GeminiChatRequest{
Contents: make([]dto.GeminiChatContent, 0, len(textRequest.Messages)),
@@ -239,16 +239,6 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
}
}
}
if generationConfig, ok := googleBody["generationConfig"].(map[string]any); ok {
generationConfigBytes, err := json.Marshal(generationConfig)
if err != nil {
return nil, fmt.Errorf("failed to marshal generationConfig: %w", err)
}
if err := json.Unmarshal(generationConfigBytes, &geminiRequest.GenerationConfig); err != nil {
return nil, fmt.Errorf("failed to unmarshal generationConfig: %w", err)
}
}
}
}
}
@@ -464,11 +454,10 @@ func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, i
})
}
} else if part.Type == dto.ContentTypeFile {
file := part.GetFile()
if file.FileId != "" {
if part.GetFile().FileId != "" {
return nil, fmt.Errorf("only base64 file is supported in gemini")
}
format, base64String, err := service.DecodeBase64FileData(file.FileData)
format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData)
if err != nil {
return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error())
}

View File

@@ -24,9 +24,13 @@ import (
"github.com/pkg/errors"
)
// VideoGenerationConfig represents the video generation configuration
// ============================
// Request / Response structures
// ============================
// GeminiVideoGenerationConfig represents the video generation configuration
// Based on: https://ai.google.dev/gemini-api/docs/video
type VideoGenerationConfig struct {
type GeminiVideoGenerationConfig struct {
AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16"
DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number)
NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements
@@ -34,21 +38,15 @@ type VideoGenerationConfig struct {
Resolution string `json:"resolution,omitempty"` // video resolution
}
type Image struct {
BytesBase64Encoded string `json:"bytesBase64Encoded,omitempty"`
MimeType string `json:"mimeType,omitempty"`
// GeminiVideoRequest represents a single video generation instance
type GeminiVideoRequest struct {
Prompt string `json:"prompt"`
}
type VideoRequest struct {
Prompt string `json:"prompt"`
Image *Image `json:"image,omitempty"`
LastFrame *Image `json:"lastFrame,omitempty"`
}
// VideoPayload represents the complete video generation request payload
type VideoPayload struct {
Instances []VideoRequest `json:"instances"`
Parameters VideoGenerationConfig `json:"parameters,omitempty"`
// GeminiVideoPayload represents the complete video generation request payload
type GeminiVideoPayload struct {
Instances []GeminiVideoRequest `json:"instances"`
Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"`
}
type submitResponse struct {
@@ -77,8 +75,6 @@ type operationResponse struct {
URI string `json:"uri"`
} `json:"video"`
} `json:"generatedSamples"`
RaiMediaFilteredCount int `json:"raiMediaFilteredCount"`
RaiMediaFilteredReasons []string `json:"raiMediaFilteredReasons"`
} `json:"generateVideoResponse"`
} `json:"response"`
Error struct {
@@ -104,7 +100,8 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {
// ValidateRequestAndSetAction parses body, validates fields and sets default action.
func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) {
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionGenerate)
// Use the standard validation method for TaskSubmitReq
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
}
// BuildRequestURL constructs the upstream URL.
@@ -140,21 +137,13 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
}
// Create structured video generation request
body := VideoPayload{
Instances: []VideoRequest{
body := GeminiVideoPayload{
Instances: []GeminiVideoRequest{
{Prompt: req.Prompt},
},
Parameters: VideoGenerationConfig{},
Parameters: GeminiVideoGenerationConfig{},
}
if len(req.Images) > 0 {
body.Instances[0].Image = a.convertImage(req.Images[0])
}
if len(req.Images) > 1 {
body.Instances[0].LastFrame = a.convertImage(req.Images[1])
}
// Parse metadata for additional configuration
metadata := req.Metadata
medaBytes, err := json.Marshal(metadata)
if err != nil {
@@ -258,19 +247,20 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e
return ti, nil
}
if len(op.Response.GenerateVideoResponse.GeneratedSamples) == 0 {
ti.Status = model.TaskStatusFailure
ti.Reason = fmt.Sprintf("no generated video url found: %s", strings.Join(op.Response.GenerateVideoResponse.RaiMediaFilteredReasons, "; "))
} else {
if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" {
ti.RemoteUrl = uri
}
ti.Status = model.TaskStatusSuccess
}
ti.Status = model.TaskStatusSuccess
ti.Progress = "100%"
taskID := encodeLocalTaskID(op.Name)
ti.TaskID = taskID
ti.Url = fmt.Sprintf("%s/v1/videos/%s/content", system_setting.ServerAddress, taskID)
// Extract URL from generateVideoResponse if available
if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 {
if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" {
ti.RemoteUrl = uri
}
}
return ti, nil
}
@@ -299,30 +289,6 @@ func (a *TaskAdaptor) ConvertToOpenAIVideo(task *model.Task) ([]byte, error) {
return common.Marshal(video)
}
func (a *TaskAdaptor) convertImage(imageStr string) *Image {
if strings.TrimSpace(imageStr) == "" {
return nil
}
img := &Image{
MimeType: "image/png",
BytesBase64Encoded: imageStr,
}
if strings.HasPrefix(imageStr, "data:image/") {
parts := strings.Split(imageStr, ";base64,")
if len(parts) == 2 {
img.MimeType = strings.TrimPrefix(parts[0], "data:")
img.BytesBase64Encoded = parts[1]
}
} else if strings.HasPrefix(imageStr, "http") {
mimeType, data, err := service.GetImageFromUrl(imageStr)
if err == nil {
img.MimeType = mimeType
img.BytesBase64Encoded = data
}
}
return img
}
// ============================
// helpers
// ============================

View File

@@ -1,10 +1,7 @@
package common
import (
"encoding/base64"
"errors"
"fmt"
"io"
"net/http"
"strconv"
"strings"
@@ -229,54 +226,3 @@ func ValidateBasicTaskRequest(c *gin.Context, info *RelayInfo, action string) *d
storeTaskRequest(c, info, action, req)
return nil
}
func GetImagesBase64sFromForm(c *gin.Context) ([]*Base64Data, error) {
return GetBase64sFromForm(c, "image")
}
func GetImageBase64sFromForm(c *gin.Context) (*Base64Data, error) {
base64s, err := GetImagesBase64sFromForm(c)
if err != nil {
return nil, err
}
return base64s[0], nil
}
type Base64Data struct {
MimeType string
Data string
}
func (m Base64Data) String() string {
return fmt.Sprintf("data:%s;base64,%s", m.MimeType, m.Data)
}
func GetBase64sFromForm(c *gin.Context, fieldName string) ([]*Base64Data, error) {
mf := c.Request.MultipartForm
if mf == nil {
if _, err := c.MultipartForm(); err != nil {
return nil, fmt.Errorf("failed to parse image edit form request: %w", err)
}
mf = c.Request.MultipartForm
}
imageFiles, exists := mf.File[fieldName]
if !exists || len(imageFiles) == 0 {
return nil, errors.New("field " + fieldName + " is not found or empty")
}
var imageBase64s []*Base64Data
for _, file := range imageFiles {
image, err := file.Open()
if err != nil {
return nil, errors.New("failed to open image file")
}
defer image.Close()
imageData, err := io.ReadAll(image)
if err != nil {
return nil, errors.New("failed to read image file")
}
mimeType := http.DetectContentType(imageData)
base64Data := base64.StdEncoding.EncodeToString(imageData)
imageBase64s = append(imageBase64s, &Base64Data{
MimeType: mimeType,
Data: base64Data,
})
}
return imageBase64s, nil
}

View File

@@ -141,7 +141,6 @@ func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageReq
imageRequest.N = uint(common.String2Int(formData.Get("n")))
imageRequest.Quality = formData.Get("quality")
imageRequest.Size = formData.Get("size")
imageRequest.ResponseFormat = formData.Get("response_format")
if imageValue := formData.Get("image"); imageValue != "" {
imageRequest.Image, _ = json.Marshal(imageValue)
}