mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-04-19 11:08:37 +00:00
feat(gemini): update request structures for Veo predictLongRunning
- Refactored the request URL and body construction methods to align with the Veo predictLongRunning endpoint. - Introduced new data structures for Veo instances and parameters, replacing the previous Gemini video generation configurations. - Updated the Vertex adaptor to utilize the new Veo request payload format.
This commit is contained in:
@@ -44,13 +44,13 @@ func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycom
|
|||||||
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
|
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
|
||||||
}
|
}
|
||||||
|
|
||||||
// BuildRequestURL constructs the Gemini API generateVideos endpoint.
|
// BuildRequestURL constructs the Gemini API predictLongRunning endpoint for Veo.
|
||||||
func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||||
modelName := info.UpstreamModelName
|
modelName := info.UpstreamModelName
|
||||||
version := model_setting.GetGeminiVersionSetting(modelName)
|
version := model_setting.GetGeminiVersionSetting(modelName)
|
||||||
|
|
||||||
return fmt.Sprintf(
|
return fmt.Sprintf(
|
||||||
"%s/%s/models/%s:generateVideos",
|
"%s/%s/models/%s:predictLongRunning",
|
||||||
a.baseURL,
|
a.baseURL,
|
||||||
version,
|
version,
|
||||||
modelName,
|
modelName,
|
||||||
@@ -65,7 +65,7 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// BuildRequestBody converts request into the Gemini API generateVideos format.
|
// BuildRequestBody converts request into the Veo predictLongRunning format.
|
||||||
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
|
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
|
||||||
v, ok := c.Get("task_request")
|
v, ok := c.Get("task_request")
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -76,34 +76,36 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
|||||||
return nil, fmt.Errorf("unexpected task_request type")
|
return nil, fmt.Errorf("unexpected task_request type")
|
||||||
}
|
}
|
||||||
|
|
||||||
body := GeminiVideoPayload{
|
instance := VeoInstance{Prompt: req.Prompt}
|
||||||
Prompt: req.Prompt,
|
|
||||||
Config: &GeminiVideoGenerationConfig{},
|
|
||||||
}
|
|
||||||
|
|
||||||
if img := ExtractMultipartImage(c, info); img != nil {
|
if img := ExtractMultipartImage(c, info); img != nil {
|
||||||
body.Image = img
|
instance.Image = img
|
||||||
} else if len(req.Images) > 0 {
|
} else if len(req.Images) > 0 {
|
||||||
if parsed := ParseImageInput(req.Images[0]); parsed != nil {
|
if parsed := ParseImageInput(req.Images[0]); parsed != nil {
|
||||||
body.Image = parsed
|
instance.Image = parsed
|
||||||
info.Action = constant.TaskActionGenerate
|
info.Action = constant.TaskActionGenerate
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := taskcommon.UnmarshalMetadata(req.Metadata, body.Config); err != nil {
|
params := &VeoParameters{}
|
||||||
|
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
|
||||||
return nil, errors.Wrap(err, "unmarshal metadata failed")
|
return nil, errors.Wrap(err, "unmarshal metadata failed")
|
||||||
}
|
}
|
||||||
if body.Config.DurationSeconds == 0 && req.Duration > 0 {
|
if params.DurationSeconds == 0 && req.Duration > 0 {
|
||||||
body.Config.DurationSeconds = req.Duration
|
params.DurationSeconds = req.Duration
|
||||||
}
|
}
|
||||||
if body.Config.Resolution == "" && req.Size != "" {
|
if params.Resolution == "" && req.Size != "" {
|
||||||
body.Config.Resolution = SizeToVeoResolution(req.Size)
|
params.Resolution = SizeToVeoResolution(req.Size)
|
||||||
}
|
}
|
||||||
if body.Config.AspectRatio == "" && req.Size != "" {
|
if params.AspectRatio == "" && req.Size != "" {
|
||||||
body.Config.AspectRatio = SizeToVeoAspectRatio(req.Size)
|
params.AspectRatio = SizeToVeoAspectRatio(req.Size)
|
||||||
|
}
|
||||||
|
params.Resolution = strings.ToLower(params.Resolution)
|
||||||
|
params.SampleCount = 1
|
||||||
|
|
||||||
|
body := VeoRequestPayload{
|
||||||
|
Instances: []VeoInstance{instance},
|
||||||
|
Parameters: params,
|
||||||
}
|
}
|
||||||
body.Config.Resolution = strings.ToLower(body.Config.Resolution)
|
|
||||||
body.Config.NumberOfVideos = 1
|
|
||||||
|
|
||||||
data, err := common.Marshal(body)
|
data, err := common.Marshal(body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -1,16 +1,5 @@
|
|||||||
package gemini
|
package gemini
|
||||||
|
|
||||||
// GeminiVideoGenerationConfig represents the Gemini API GenerateVideosConfig.
|
|
||||||
// Reference: https://ai.google.dev/gemini-api/docs/video
|
|
||||||
type GeminiVideoGenerationConfig struct {
|
|
||||||
AspectRatio string `json:"aspectRatio,omitempty"`
|
|
||||||
DurationSeconds int `json:"durationSeconds,omitempty"`
|
|
||||||
NegativePrompt string `json:"negativePrompt,omitempty"`
|
|
||||||
PersonGeneration string `json:"personGeneration,omitempty"`
|
|
||||||
Resolution string `json:"resolution,omitempty"`
|
|
||||||
NumberOfVideos int `json:"numberOfVideos,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// VeoImageInput represents an image input for Veo image-to-video.
|
// VeoImageInput represents an image input for Veo image-to-video.
|
||||||
// Used by both Gemini and Vertex adaptors.
|
// Used by both Gemini and Vertex adaptors.
|
||||||
type VeoImageInput struct {
|
type VeoImageInput struct {
|
||||||
@@ -18,17 +7,36 @@ type VeoImageInput struct {
|
|||||||
MimeType string `json:"mimeType"`
|
MimeType string `json:"mimeType"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GeminiVideoPayload is the top-level request body for the Gemini API
|
// VeoInstance represents a single instance in the Veo predictLongRunning request.
|
||||||
// models/{model}:generateVideos endpoint.
|
type VeoInstance struct {
|
||||||
type GeminiVideoPayload struct {
|
Prompt string `json:"prompt"`
|
||||||
Model string `json:"model,omitempty"`
|
Image *VeoImageInput `json:"image,omitempty"`
|
||||||
Prompt string `json:"prompt"`
|
|
||||||
Image *VeoImageInput `json:"image,omitempty"`
|
|
||||||
Config *GeminiVideoGenerationConfig `json:"config,omitempty"`
|
|
||||||
// TODO: support referenceImages (style/asset references, up to 3 images)
|
// TODO: support referenceImages (style/asset references, up to 3 images)
|
||||||
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
|
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VeoParameters represents the parameters block for Veo predictLongRunning.
|
||||||
|
type VeoParameters struct {
|
||||||
|
SampleCount int `json:"sampleCount"`
|
||||||
|
DurationSeconds int `json:"durationSeconds,omitempty"`
|
||||||
|
AspectRatio string `json:"aspectRatio,omitempty"`
|
||||||
|
Resolution string `json:"resolution,omitempty"`
|
||||||
|
NegativePrompt string `json:"negativePrompt,omitempty"`
|
||||||
|
PersonGeneration string `json:"personGeneration,omitempty"`
|
||||||
|
StorageUri string `json:"storageUri,omitempty"`
|
||||||
|
CompressionQuality string `json:"compressionQuality,omitempty"`
|
||||||
|
ResizeMode string `json:"resizeMode,omitempty"`
|
||||||
|
Seed *int `json:"seed,omitempty"`
|
||||||
|
GenerateAudio *bool `json:"generateAudio,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// VeoRequestPayload is the top-level request body for the Veo
|
||||||
|
// predictLongRunning endpoint (used by both Gemini and Vertex).
|
||||||
|
type VeoRequestPayload struct {
|
||||||
|
Instances []VeoInstance `json:"instances"`
|
||||||
|
Parameters *VeoParameters `json:"parameters,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type submitResponse struct {
|
type submitResponse struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,32 +27,6 @@ import (
|
|||||||
// Request / Response structures
|
// Request / Response structures
|
||||||
// ============================
|
// ============================
|
||||||
|
|
||||||
type veoInstance struct {
|
|
||||||
Prompt string `json:"prompt"`
|
|
||||||
Image *geminitask.VeoImageInput `json:"image,omitempty"`
|
|
||||||
// TODO: support referenceImages (style/asset references, up to 3 images)
|
|
||||||
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
|
|
||||||
}
|
|
||||||
|
|
||||||
type veoParameters struct {
|
|
||||||
SampleCount int `json:"sampleCount"`
|
|
||||||
DurationSeconds int `json:"durationSeconds,omitempty"`
|
|
||||||
AspectRatio string `json:"aspectRatio,omitempty"`
|
|
||||||
Resolution string `json:"resolution,omitempty"`
|
|
||||||
NegativePrompt string `json:"negativePrompt,omitempty"`
|
|
||||||
PersonGeneration string `json:"personGeneration,omitempty"`
|
|
||||||
StorageUri string `json:"storageUri,omitempty"`
|
|
||||||
CompressionQuality string `json:"compressionQuality,omitempty"`
|
|
||||||
ResizeMode string `json:"resizeMode,omitempty"`
|
|
||||||
Seed *int `json:"seed,omitempty"`
|
|
||||||
GenerateAudio *bool `json:"generateAudio,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type requestPayload struct {
|
|
||||||
Instances []veoInstance `json:"instances"`
|
|
||||||
Parameters *veoParameters `json:"parameters,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type fetchOperationPayload struct {
|
type fetchOperationPayload struct {
|
||||||
OperationName string `json:"operationName"`
|
OperationName string `json:"operationName"`
|
||||||
}
|
}
|
||||||
@@ -186,7 +160,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
|||||||
}
|
}
|
||||||
req := v.(relaycommon.TaskSubmitReq)
|
req := v.(relaycommon.TaskSubmitReq)
|
||||||
|
|
||||||
instance := veoInstance{Prompt: req.Prompt}
|
instance := geminitask.VeoInstance{Prompt: req.Prompt}
|
||||||
if img := geminitask.ExtractMultipartImage(c, info); img != nil {
|
if img := geminitask.ExtractMultipartImage(c, info); img != nil {
|
||||||
instance.Image = img
|
instance.Image = img
|
||||||
} else if len(req.Images) > 0 {
|
} else if len(req.Images) > 0 {
|
||||||
@@ -196,7 +170,7 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
params := &veoParameters{}
|
params := &geminitask.VeoParameters{}
|
||||||
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
|
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
|
||||||
return nil, fmt.Errorf("unmarshal metadata failed: %w", err)
|
return nil, fmt.Errorf("unmarshal metadata failed: %w", err)
|
||||||
}
|
}
|
||||||
@@ -212,8 +186,8 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
|||||||
params.Resolution = strings.ToLower(params.Resolution)
|
params.Resolution = strings.ToLower(params.Resolution)
|
||||||
params.SampleCount = 1
|
params.SampleCount = 1
|
||||||
|
|
||||||
body := requestPayload{
|
body := geminitask.VeoRequestPayload{
|
||||||
Instances: []veoInstance{instance},
|
Instances: []geminitask.VeoInstance{instance},
|
||||||
Parameters: params,
|
Parameters: params,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user