diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go index 0fa9dda4b..3cecce2db 100644 --- a/relay/channel/task/gemini/adaptor.go +++ b/relay/channel/task/gemini/adaptor.go @@ -24,13 +24,9 @@ import ( "github.com/pkg/errors" ) -// ============================ -// Request / Response structures -// ============================ - -// GeminiVideoGenerationConfig represents the video generation configuration +// VideoGenerationConfig represents the video generation configuration // Based on: https://ai.google.dev/gemini-api/docs/video -type GeminiVideoGenerationConfig struct { +type VideoGenerationConfig struct { AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16" DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number) NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements @@ -38,15 +34,21 @@ type GeminiVideoGenerationConfig struct { Resolution string `json:"resolution,omitempty"` // video resolution } -// GeminiVideoRequest represents a single video generation instance -type GeminiVideoRequest struct { - Prompt string `json:"prompt"` +type Image struct { + BytesBase64Encoded string `json:"bytesBase64Encoded,omitempty"` + MimeType string `json:"mimeType,omitempty"` } -// GeminiVideoPayload represents the complete video generation request payload -type GeminiVideoPayload struct { - Instances []GeminiVideoRequest `json:"instances"` - Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"` +type VideoRequest struct { + Prompt string `json:"prompt"` + Image *Image `json:"image,omitempty"` + LastFrame *Image `json:"lastFrame,omitempty"` +} + +// VideoPayload represents the complete video generation request payload +type VideoPayload struct { + Instances []VideoRequest `json:"instances"` + Parameters VideoGenerationConfig `json:"parameters,omitempty"` } type submitResponse struct { @@ -100,8 +102,7 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) { // ValidateRequestAndSetAction parses body, validates fields and sets default action. func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) { - // Use the standard validation method for TaskSubmitReq - return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate) + return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionGenerate) } // BuildRequestURL constructs the upstream URL. @@ -137,13 +138,21 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn } // Create structured video generation request - body := GeminiVideoPayload{ - Instances: []GeminiVideoRequest{ + body := VideoPayload{ + Instances: []VideoRequest{ {Prompt: req.Prompt}, }, - Parameters: GeminiVideoGenerationConfig{}, + Parameters: VideoGenerationConfig{}, } + if len(req.Images) > 0 { + body.Instances[0].Image = a.convertImage(req.Images[0]) + } + if len(req.Images) > 1 { + body.Instances[0].LastFrame = a.convertImage(req.Images[1]) + } + + // Parse metadata for additional configuration metadata := req.Metadata medaBytes, err := json.Marshal(metadata) if err != nil { @@ -289,6 +298,30 @@ func (a *TaskAdaptor) ConvertToOpenAIVideo(task *model.Task) ([]byte, error) { return common.Marshal(video) } +func (a *TaskAdaptor) convertImage(imageStr string) *Image { + if strings.TrimSpace(imageStr) == "" { + return nil + } + img := &Image{ + MimeType: "image/png", + BytesBase64Encoded: imageStr, + } + if strings.HasPrefix(imageStr, "data:image/") { + parts := strings.Split(imageStr, ";base64,") + if len(parts) == 2 { + img.MimeType = strings.TrimPrefix(parts[0], "data:") + img.BytesBase64Encoded = parts[1] + } + } else if strings.HasPrefix(imageStr, "http") { + mimeType, data, err := service.GetImageFromUrl(imageStr) + if err == nil { + img.MimeType = mimeType + img.BytesBase64Encoded = data + } + } + return img +} + // ============================ // helpers // ============================