feat: gemini video veo3.1 add i2v

This commit is contained in:
feitianbubu
2025-11-26 20:40:12 +08:00
parent 95a7749e1d
commit 4a367edfde

View File

@@ -24,13 +24,9 @@ import (
"github.com/pkg/errors"
)
// ============================
// Request / Response structures
// ============================
// GeminiVideoGenerationConfig represents the video generation configuration
// VideoGenerationConfig represents the video generation configuration
// Based on: https://ai.google.dev/gemini-api/docs/video
type GeminiVideoGenerationConfig struct {
type VideoGenerationConfig struct {
AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16"
DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number)
NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements
@@ -38,15 +34,21 @@ type GeminiVideoGenerationConfig struct {
Resolution string `json:"resolution,omitempty"` // video resolution
}
// GeminiVideoRequest represents a single video generation instance
type GeminiVideoRequest struct {
Prompt string `json:"prompt"`
type Image struct {
BytesBase64Encoded string `json:"bytesBase64Encoded,omitempty"`
MimeType string `json:"mimeType,omitempty"`
}
// GeminiVideoPayload represents the complete video generation request payload
type GeminiVideoPayload struct {
Instances []GeminiVideoRequest `json:"instances"`
Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"`
type VideoRequest struct {
Prompt string `json:"prompt"`
Image *Image `json:"image,omitempty"`
LastFrame *Image `json:"lastFrame,omitempty"`
}
// VideoPayload represents the complete video generation request payload
type VideoPayload struct {
Instances []VideoRequest `json:"instances"`
Parameters VideoGenerationConfig `json:"parameters,omitempty"`
}
type submitResponse struct {
@@ -100,8 +102,7 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {
// ValidateRequestAndSetAction parses body, validates fields and sets default action.
func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) {
// Use the standard validation method for TaskSubmitReq
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionGenerate)
}
// BuildRequestURL constructs the upstream URL.
@@ -137,13 +138,21 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
}
// Create structured video generation request
body := GeminiVideoPayload{
Instances: []GeminiVideoRequest{
body := VideoPayload{
Instances: []VideoRequest{
{Prompt: req.Prompt},
},
Parameters: GeminiVideoGenerationConfig{},
Parameters: VideoGenerationConfig{},
}
if len(req.Images) > 0 {
body.Instances[0].Image = a.convertImage(req.Images[0])
}
if len(req.Images) > 1 {
body.Instances[0].LastFrame = a.convertImage(req.Images[1])
}
// Parse metadata for additional configuration
metadata := req.Metadata
medaBytes, err := json.Marshal(metadata)
if err != nil {
@@ -289,6 +298,30 @@ func (a *TaskAdaptor) ConvertToOpenAIVideo(task *model.Task) ([]byte, error) {
return common.Marshal(video)
}
func (a *TaskAdaptor) convertImage(imageStr string) *Image {
if strings.TrimSpace(imageStr) == "" {
return nil
}
img := &Image{
MimeType: "image/png",
BytesBase64Encoded: imageStr,
}
if strings.HasPrefix(imageStr, "data:image/") {
parts := strings.Split(imageStr, ";base64,")
if len(parts) == 2 {
img.MimeType = strings.TrimPrefix(parts[0], "data:")
img.BytesBase64Encoded = parts[1]
}
} else if strings.HasPrefix(imageStr, "http") {
mimeType, data, err := service.GetImageFromUrl(imageStr)
if err == nil {
img.MimeType = mimeType
img.BytesBase64Encoded = data
}
}
return img
}
// ============================
// helpers
// ============================