feat(gemini): update request structures for Veo predictLongRunning

- Refactored the request URL and body construction methods to align with the Veo predictLongRunning endpoint.
- Introduced new data structures for Veo instances and parameters, replacing the previous Gemini video generation configurations.
- Updated the Vertex adaptor to utilize the new Veo request payload format.
This commit is contained in:
CaIon
2026-02-28 18:42:54 +08:00
parent 99bb41e310
commit 21cfc1ca38
3 changed files with 51 additions and 67 deletions

View File

@@ -44,13 +44,13 @@ func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycom
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
}
// BuildRequestURL constructs the Gemini API generateVideos endpoint.
// BuildRequestURL constructs the Gemini API predictLongRunning endpoint for Veo.
func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
modelName := info.UpstreamModelName
version := model_setting.GetGeminiVersionSetting(modelName)
return fmt.Sprintf(
"%s/%s/models/%s:generateVideos",
"%s/%s/models/%s:predictLongRunning",
a.baseURL,
version,
modelName,
@@ -65,7 +65,7 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
return nil
}
// BuildRequestBody converts request into the Gemini API generateVideos format.
// BuildRequestBody converts request into the Veo predictLongRunning format.
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
v, ok := c.Get("task_request")
if !ok {
@@ -76,34 +76,36 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
return nil, fmt.Errorf("unexpected task_request type")
}
body := GeminiVideoPayload{
Prompt: req.Prompt,
Config: &GeminiVideoGenerationConfig{},
}
instance := VeoInstance{Prompt: req.Prompt}
if img := ExtractMultipartImage(c, info); img != nil {
body.Image = img
instance.Image = img
} else if len(req.Images) > 0 {
if parsed := ParseImageInput(req.Images[0]); parsed != nil {
body.Image = parsed
instance.Image = parsed
info.Action = constant.TaskActionGenerate
}
}
if err := taskcommon.UnmarshalMetadata(req.Metadata, body.Config); err != nil {
params := &VeoParameters{}
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
return nil, errors.Wrap(err, "unmarshal metadata failed")
}
if body.Config.DurationSeconds == 0 && req.Duration > 0 {
body.Config.DurationSeconds = req.Duration
if params.DurationSeconds == 0 && req.Duration > 0 {
params.DurationSeconds = req.Duration
}
if body.Config.Resolution == "" && req.Size != "" {
body.Config.Resolution = SizeToVeoResolution(req.Size)
if params.Resolution == "" && req.Size != "" {
params.Resolution = SizeToVeoResolution(req.Size)
}
if body.Config.AspectRatio == "" && req.Size != "" {
body.Config.AspectRatio = SizeToVeoAspectRatio(req.Size)
if params.AspectRatio == "" && req.Size != "" {
params.AspectRatio = SizeToVeoAspectRatio(req.Size)
}
params.Resolution = strings.ToLower(params.Resolution)
params.SampleCount = 1
body := VeoRequestPayload{
Instances: []VeoInstance{instance},
Parameters: params,
}
body.Config.Resolution = strings.ToLower(body.Config.Resolution)
body.Config.NumberOfVideos = 1
data, err := common.Marshal(body)
if err != nil {

View File

@@ -1,16 +1,5 @@
package gemini
// GeminiVideoGenerationConfig represents the Gemini API GenerateVideosConfig.
// Reference: https://ai.google.dev/gemini-api/docs/video
type GeminiVideoGenerationConfig struct {
AspectRatio string `json:"aspectRatio,omitempty"`
DurationSeconds int `json:"durationSeconds,omitempty"`
NegativePrompt string `json:"negativePrompt,omitempty"`
PersonGeneration string `json:"personGeneration,omitempty"`
Resolution string `json:"resolution,omitempty"`
NumberOfVideos int `json:"numberOfVideos,omitempty"`
}
// VeoImageInput represents an image input for Veo image-to-video.
// Used by both Gemini and Vertex adaptors.
type VeoImageInput struct {
@@ -18,17 +7,36 @@ type VeoImageInput struct {
MimeType string `json:"mimeType"`
}
// GeminiVideoPayload is the top-level request body for the Gemini API
// models/{model}:generateVideos endpoint.
type GeminiVideoPayload struct {
Model string `json:"model,omitempty"`
Prompt string `json:"prompt"`
Image *VeoImageInput `json:"image,omitempty"`
Config *GeminiVideoGenerationConfig `json:"config,omitempty"`
// VeoInstance represents a single instance in the Veo predictLongRunning request.
type VeoInstance struct {
Prompt string `json:"prompt"`
Image *VeoImageInput `json:"image,omitempty"`
// TODO: support referenceImages (style/asset references, up to 3 images)
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
}
// VeoParameters represents the parameters block for Veo predictLongRunning.
type VeoParameters struct {
SampleCount int `json:"sampleCount"`
DurationSeconds int `json:"durationSeconds,omitempty"`
AspectRatio string `json:"aspectRatio,omitempty"`
Resolution string `json:"resolution,omitempty"`
NegativePrompt string `json:"negativePrompt,omitempty"`
PersonGeneration string `json:"personGeneration,omitempty"`
StorageUri string `json:"storageUri,omitempty"`
CompressionQuality string `json:"compressionQuality,omitempty"`
ResizeMode string `json:"resizeMode,omitempty"`
Seed *int `json:"seed,omitempty"`
GenerateAudio *bool `json:"generateAudio,omitempty"`
}
// VeoRequestPayload is the top-level request body for the Veo
// predictLongRunning endpoint (used by both Gemini and Vertex).
type VeoRequestPayload struct {
Instances []VeoInstance `json:"instances"`
Parameters *VeoParameters `json:"parameters,omitempty"`
}
type submitResponse struct {
Name string `json:"name"`
}

View File

@@ -27,32 +27,6 @@ import (
// Request / Response structures
// ============================
type veoInstance struct {
Prompt string `json:"prompt"`
Image *geminitask.VeoImageInput `json:"image,omitempty"`
// TODO: support referenceImages (style/asset references, up to 3 images)
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
}
type veoParameters struct {
SampleCount int `json:"sampleCount"`
DurationSeconds int `json:"durationSeconds,omitempty"`
AspectRatio string `json:"aspectRatio,omitempty"`
Resolution string `json:"resolution,omitempty"`
NegativePrompt string `json:"negativePrompt,omitempty"`
PersonGeneration string `json:"personGeneration,omitempty"`
StorageUri string `json:"storageUri,omitempty"`
CompressionQuality string `json:"compressionQuality,omitempty"`
ResizeMode string `json:"resizeMode,omitempty"`
Seed *int `json:"seed,omitempty"`
GenerateAudio *bool `json:"generateAudio,omitempty"`
}
type requestPayload struct {
Instances []veoInstance `json:"instances"`
Parameters *veoParameters `json:"parameters,omitempty"`
}
type fetchOperationPayload struct {
OperationName string `json:"operationName"`
}
@@ -186,7 +160,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
}
req := v.(relaycommon.TaskSubmitReq)
instance := veoInstance{Prompt: req.Prompt}
instance := geminitask.VeoInstance{Prompt: req.Prompt}
if img := geminitask.ExtractMultipartImage(c, info); img != nil {
instance.Image = img
} else if len(req.Images) > 0 {
@@ -196,7 +170,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
}
}
params := &veoParameters{}
params := &geminitask.VeoParameters{}
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
return nil, fmt.Errorf("unmarshal metadata failed: %w", err)
}
@@ -212,8 +186,8 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
params.Resolution = strings.ToLower(params.Resolution)
params.SampleCount = 1
body := requestPayload{
Instances: []veoInstance{instance},
body := geminitask.VeoRequestPayload{
Instances: []geminitask.VeoInstance{instance},
Parameters: params,
}