mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 05:02:17 +00:00
feat(gemini): implement video generation configuration and billing estimation
- Added Gemini video generation configuration structures and payloads. - Introduced functions for parsing and resolving video duration and resolution from metadata. - Enhanced the Vertex adaptor to support Gemini video generation requests and billing estimation based on duration and resolution. - Updated model pricing settings for new Gemini video models.
This commit is contained in:
@@ -43,6 +43,7 @@ func (m *OpenAIVideo) SetMetadata(k string, v any) {
|
||||
func NewOpenAIVideo() *OpenAIVideo {
|
||||
return &OpenAIVideo{
|
||||
Object: "video",
|
||||
Status: VideoStatusQueued,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,64 +22,6 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// ============================
|
||||
// Request / Response structures
|
||||
// ============================
|
||||
|
||||
// GeminiVideoGenerationConfig represents the video generation configuration
|
||||
// Based on: https://ai.google.dev/gemini-api/docs/video
|
||||
type GeminiVideoGenerationConfig struct {
|
||||
AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16"
|
||||
DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number)
|
||||
NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements
|
||||
PersonGeneration string `json:"personGeneration,omitempty"` // "allow_all" for text-to-video, "allow_adult" for image-to-video
|
||||
Resolution string `json:"resolution,omitempty"` // video resolution
|
||||
}
|
||||
|
||||
// GeminiVideoRequest represents a single video generation instance
|
||||
type GeminiVideoRequest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
}
|
||||
|
||||
// GeminiVideoPayload represents the complete video generation request payload
|
||||
type GeminiVideoPayload struct {
|
||||
Instances []GeminiVideoRequest `json:"instances"`
|
||||
Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type submitResponse struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type operationVideo struct {
|
||||
MimeType string `json:"mimeType"`
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
Encoding string `json:"encoding"`
|
||||
}
|
||||
|
||||
type operationResponse struct {
|
||||
Name string `json:"name"`
|
||||
Done bool `json:"done"`
|
||||
Response struct {
|
||||
Type string `json:"@type"`
|
||||
RaiMediaFilteredCount int `json:"raiMediaFilteredCount"`
|
||||
Videos []operationVideo `json:"videos"`
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
Encoding string `json:"encoding"`
|
||||
Video string `json:"video"`
|
||||
GenerateVideoResponse struct {
|
||||
GeneratedSamples []struct {
|
||||
Video struct {
|
||||
URI string `json:"uri"`
|
||||
} `json:"video"`
|
||||
} `json:"generatedSamples"`
|
||||
} `json:"generateVideoResponse"`
|
||||
} `json:"response"`
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
// ============================
|
||||
// Adaptor implementation
|
||||
// ============================
|
||||
@@ -99,17 +41,16 @@ func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) {
|
||||
|
||||
// ValidateRequestAndSetAction parses body, validates fields and sets default action.
|
||||
func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) {
|
||||
// Use the standard validation method for TaskSubmitReq
|
||||
return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate)
|
||||
}
|
||||
|
||||
// BuildRequestURL constructs the upstream URL.
|
||||
// BuildRequestURL constructs the Gemini API generateVideos endpoint.
|
||||
func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
modelName := info.UpstreamModelName
|
||||
version := model_setting.GetGeminiVersionSetting(modelName)
|
||||
|
||||
return fmt.Sprintf(
|
||||
"%s/%s/models/%s:predictLongRunning",
|
||||
"%s/%s/models/%s:generateVideos",
|
||||
a.baseURL,
|
||||
version,
|
||||
modelName,
|
||||
@@ -124,7 +65,7 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
|
||||
return nil
|
||||
}
|
||||
|
||||
// BuildRequestBody converts request into Gemini specific format.
|
||||
// BuildRequestBody converts request into the Gemini API generateVideos format.
|
||||
func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) {
|
||||
v, ok := c.Get("task_request")
|
||||
if !ok {
|
||||
@@ -135,18 +76,34 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
||||
return nil, fmt.Errorf("unexpected task_request type")
|
||||
}
|
||||
|
||||
// Create structured video generation request
|
||||
body := GeminiVideoPayload{
|
||||
Instances: []GeminiVideoRequest{
|
||||
{Prompt: req.Prompt},
|
||||
},
|
||||
Parameters: GeminiVideoGenerationConfig{},
|
||||
Prompt: req.Prompt,
|
||||
Config: &GeminiVideoGenerationConfig{},
|
||||
}
|
||||
|
||||
metadata := req.Metadata
|
||||
if err := taskcommon.UnmarshalMetadata(metadata, &body.Parameters); err != nil {
|
||||
if img := ExtractMultipartImage(c, info); img != nil {
|
||||
body.Image = img
|
||||
} else if len(req.Images) > 0 {
|
||||
if parsed := ParseImageInput(req.Images[0]); parsed != nil {
|
||||
body.Image = parsed
|
||||
info.Action = constant.TaskActionGenerate
|
||||
}
|
||||
}
|
||||
|
||||
if err := taskcommon.UnmarshalMetadata(req.Metadata, body.Config); err != nil {
|
||||
return nil, errors.Wrap(err, "unmarshal metadata failed")
|
||||
}
|
||||
if body.Config.DurationSeconds == 0 && req.Duration > 0 {
|
||||
body.Config.DurationSeconds = req.Duration
|
||||
}
|
||||
if body.Config.Resolution == "" && req.Size != "" {
|
||||
body.Config.Resolution = SizeToVeoResolution(req.Size)
|
||||
}
|
||||
if body.Config.AspectRatio == "" && req.Size != "" {
|
||||
body.Config.AspectRatio = SizeToVeoAspectRatio(req.Size)
|
||||
}
|
||||
body.Config.Resolution = strings.ToLower(body.Config.Resolution)
|
||||
body.Config.NumberOfVideos = 1
|
||||
|
||||
data, err := common.Marshal(body)
|
||||
if err != nil {
|
||||
@@ -186,14 +143,40 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
|
||||
}
|
||||
|
||||
func (a *TaskAdaptor) GetModelList() []string {
|
||||
return []string{"veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"}
|
||||
return []string{
|
||||
"veo-3.0-generate-001",
|
||||
"veo-3.0-fast-generate-001",
|
||||
"veo-3.1-generate-preview",
|
||||
"veo-3.1-fast-generate-preview",
|
||||
}
|
||||
}
|
||||
|
||||
func (a *TaskAdaptor) GetChannelName() string {
|
||||
return "gemini"
|
||||
}
|
||||
|
||||
// FetchTask fetch task status
|
||||
// EstimateBilling returns OtherRatios based on durationSeconds and resolution.
|
||||
func (a *TaskAdaptor) EstimateBilling(c *gin.Context, info *relaycommon.RelayInfo) map[string]float64 {
|
||||
v, ok := c.Get("task_request")
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
req, ok := v.(relaycommon.TaskSubmitReq)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
seconds := ResolveVeoDuration(req.Metadata, req.Duration, req.Seconds)
|
||||
resolution := ResolveVeoResolution(req.Metadata, req.Size)
|
||||
resRatio := VeoResolutionRatio(info.UpstreamModelName, resolution)
|
||||
|
||||
return map[string]float64{
|
||||
"seconds": float64(seconds),
|
||||
"resolution": resRatio,
|
||||
}
|
||||
}
|
||||
|
||||
// FetchTask polls task status via the Gemini operations GET endpoint.
|
||||
func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy string) (*http.Response, error) {
|
||||
taskID, ok := body["task_id"].(string)
|
||||
if !ok {
|
||||
@@ -205,7 +188,6 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy
|
||||
return nil, fmt.Errorf("decode task_id failed: %w", err)
|
||||
}
|
||||
|
||||
// For Gemini API, we use GET request to the operations endpoint
|
||||
version := model_setting.GetGeminiVersionSetting("default")
|
||||
url := fmt.Sprintf("%s/%s/%s", baseUrl, version, upstreamName)
|
||||
|
||||
@@ -249,11 +231,9 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e
|
||||
ti.Progress = "100%"
|
||||
|
||||
ti.TaskID = taskcommon.EncodeLocalTaskID(op.Name)
|
||||
// Url intentionally left empty — the caller constructs the proxy URL using the public task ID
|
||||
|
||||
// Extract URL from generateVideoResponse if available
|
||||
if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 {
|
||||
if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" {
|
||||
if len(op.Response.GenerateVideoResponse.GeneratedVideos) > 0 {
|
||||
if uri := op.Response.GenerateVideoResponse.GeneratedVideos[0].Video.URI; uri != "" {
|
||||
ti.RemoteUrl = uri
|
||||
}
|
||||
}
|
||||
@@ -262,8 +242,6 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e
|
||||
}
|
||||
|
||||
func (a *TaskAdaptor) ConvertToOpenAIVideo(task *model.Task) ([]byte, error) {
|
||||
// Use GetUpstreamTaskID() to get the real upstream operation name for model extraction.
|
||||
// task.TaskID is now a public task_xxxx ID, no longer a base64-encoded upstream name.
|
||||
upstreamTaskID := task.GetUpstreamTaskID()
|
||||
upstreamName, err := taskcommon.DecodeLocalTaskID(upstreamTaskID)
|
||||
if err != nil {
|
||||
|
||||
138
relay/channel/task/gemini/billing.go
Normal file
138
relay/channel/task/gemini/billing.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseVeoDurationSeconds extracts durationSeconds from metadata.
|
||||
// Returns 8 (Veo default) when not specified or invalid.
|
||||
func ParseVeoDurationSeconds(metadata map[string]any) int {
|
||||
if metadata == nil {
|
||||
return 8
|
||||
}
|
||||
v, ok := metadata["durationSeconds"]
|
||||
if !ok {
|
||||
return 8
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
if int(n) > 0 {
|
||||
return int(n)
|
||||
}
|
||||
case int:
|
||||
if n > 0 {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 8
|
||||
}
|
||||
|
||||
// ParseVeoResolution extracts resolution from metadata.
|
||||
// Returns "720p" when not specified.
|
||||
func ParseVeoResolution(metadata map[string]any) string {
|
||||
if metadata == nil {
|
||||
return "720p"
|
||||
}
|
||||
v, ok := metadata["resolution"]
|
||||
if !ok {
|
||||
return "720p"
|
||||
}
|
||||
if s, ok := v.(string); ok && s != "" {
|
||||
return strings.ToLower(s)
|
||||
}
|
||||
return "720p"
|
||||
}
|
||||
|
||||
// ResolveVeoDuration returns the effective duration in seconds.
|
||||
// Priority: metadata["durationSeconds"] > stdDuration > stdSeconds > default (8).
|
||||
func ResolveVeoDuration(metadata map[string]any, stdDuration int, stdSeconds string) int {
|
||||
if metadata != nil {
|
||||
if _, exists := metadata["durationSeconds"]; exists {
|
||||
if d := ParseVeoDurationSeconds(metadata); d > 0 {
|
||||
return d
|
||||
}
|
||||
}
|
||||
}
|
||||
if stdDuration > 0 {
|
||||
return stdDuration
|
||||
}
|
||||
if s, err := strconv.Atoi(stdSeconds); err == nil && s > 0 {
|
||||
return s
|
||||
}
|
||||
return 8
|
||||
}
|
||||
|
||||
// ResolveVeoResolution returns the effective resolution string (lowercase).
|
||||
// Priority: metadata["resolution"] > SizeToVeoResolution(stdSize) > default ("720p").
|
||||
func ResolveVeoResolution(metadata map[string]any, stdSize string) string {
|
||||
if metadata != nil {
|
||||
if _, exists := metadata["resolution"]; exists {
|
||||
if r := ParseVeoResolution(metadata); r != "" {
|
||||
return r
|
||||
}
|
||||
}
|
||||
}
|
||||
if stdSize != "" {
|
||||
return SizeToVeoResolution(stdSize)
|
||||
}
|
||||
return "720p"
|
||||
}
|
||||
|
||||
// SizeToVeoResolution converts a "WxH" size string to a Veo resolution label.
|
||||
func SizeToVeoResolution(size string) string {
|
||||
parts := strings.SplitN(strings.ToLower(size), "x", 2)
|
||||
if len(parts) != 2 {
|
||||
return "720p"
|
||||
}
|
||||
w, _ := strconv.Atoi(parts[0])
|
||||
h, _ := strconv.Atoi(parts[1])
|
||||
maxDim := w
|
||||
if h > maxDim {
|
||||
maxDim = h
|
||||
}
|
||||
if maxDim >= 3840 {
|
||||
return "4k"
|
||||
}
|
||||
if maxDim >= 1920 {
|
||||
return "1080p"
|
||||
}
|
||||
return "720p"
|
||||
}
|
||||
|
||||
// SizeToVeoAspectRatio converts a "WxH" size string to a Veo aspect ratio.
|
||||
func SizeToVeoAspectRatio(size string) string {
|
||||
parts := strings.SplitN(strings.ToLower(size), "x", 2)
|
||||
if len(parts) != 2 {
|
||||
return "16:9"
|
||||
}
|
||||
w, _ := strconv.Atoi(parts[0])
|
||||
h, _ := strconv.Atoi(parts[1])
|
||||
if w <= 0 || h <= 0 {
|
||||
return "16:9"
|
||||
}
|
||||
if h > w {
|
||||
return "9:16"
|
||||
}
|
||||
return "16:9"
|
||||
}
|
||||
|
||||
// VeoResolutionRatio returns the pricing multiplier for the given resolution.
|
||||
// Standard resolutions (720p, 1080p) return 1.0.
|
||||
// 4K returns a model-specific multiplier based on Google's official pricing.
|
||||
func VeoResolutionRatio(modelName, resolution string) float64 {
|
||||
if resolution != "4k" {
|
||||
return 1.0
|
||||
}
|
||||
// 4K multipliers derived from Vertex AI official pricing (video+audio base):
|
||||
// veo-3.1-generate: $0.60 / $0.40 = 1.5
|
||||
// veo-3.1-fast-generate: $0.35 / $0.15 ≈ 2.333
|
||||
// Veo 3.0 models do not support 4K; return 1.0 as fallback.
|
||||
if strings.Contains(modelName, "3.1-fast-generate") {
|
||||
return 2.333333
|
||||
}
|
||||
if strings.Contains(modelName, "3.1-generate") || strings.Contains(modelName, "3.1") {
|
||||
return 1.5
|
||||
}
|
||||
return 1.0
|
||||
}
|
||||
63
relay/channel/task/gemini/dto.go
Normal file
63
relay/channel/task/gemini/dto.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package gemini
|
||||
|
||||
// GeminiVideoGenerationConfig represents the Gemini API GenerateVideosConfig.
|
||||
// Reference: https://ai.google.dev/gemini-api/docs/video
|
||||
type GeminiVideoGenerationConfig struct {
|
||||
AspectRatio string `json:"aspectRatio,omitempty"`
|
||||
DurationSeconds int `json:"durationSeconds,omitempty"`
|
||||
NegativePrompt string `json:"negativePrompt,omitempty"`
|
||||
PersonGeneration string `json:"personGeneration,omitempty"`
|
||||
Resolution string `json:"resolution,omitempty"`
|
||||
NumberOfVideos int `json:"numberOfVideos,omitempty"`
|
||||
}
|
||||
|
||||
// VeoImageInput represents an image input for Veo image-to-video.
|
||||
// Used by both Gemini and Vertex adaptors.
|
||||
type VeoImageInput struct {
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
MimeType string `json:"mimeType"`
|
||||
}
|
||||
|
||||
// GeminiVideoPayload is the top-level request body for the Gemini API
|
||||
// models/{model}:generateVideos endpoint.
|
||||
type GeminiVideoPayload struct {
|
||||
Model string `json:"model,omitempty"`
|
||||
Prompt string `json:"prompt"`
|
||||
Image *VeoImageInput `json:"image,omitempty"`
|
||||
Config *GeminiVideoGenerationConfig `json:"config,omitempty"`
|
||||
// TODO: support referenceImages (style/asset references, up to 3 images)
|
||||
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
|
||||
}
|
||||
|
||||
type submitResponse struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type operationVideo struct {
|
||||
MimeType string `json:"mimeType"`
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
Encoding string `json:"encoding"`
|
||||
}
|
||||
|
||||
type operationResponse struct {
|
||||
Name string `json:"name"`
|
||||
Done bool `json:"done"`
|
||||
Response struct {
|
||||
Type string `json:"@type"`
|
||||
RaiMediaFilteredCount int `json:"raiMediaFilteredCount"`
|
||||
Videos []operationVideo `json:"videos"`
|
||||
BytesBase64Encoded string `json:"bytesBase64Encoded"`
|
||||
Encoding string `json:"encoding"`
|
||||
Video string `json:"video"`
|
||||
GenerateVideoResponse struct {
|
||||
GeneratedVideos []struct {
|
||||
Video struct {
|
||||
URI string `json:"uri"`
|
||||
} `json:"video"`
|
||||
} `json:"generatedVideos"`
|
||||
} `json:"generateVideoResponse"`
|
||||
} `json:"response"`
|
||||
Error struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
100
relay/channel/task/gemini/image.go
Normal file
100
relay/channel/task/gemini/image.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/QuantumNous/new-api/constant"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
const maxVeoImageSize = 20 * 1024 * 1024 // 20 MB
|
||||
|
||||
// ExtractMultipartImage reads the first `input_reference` file from a multipart
|
||||
// form upload and returns a VeoImageInput. Returns nil if no file is present.
|
||||
func ExtractMultipartImage(c *gin.Context, info *relaycommon.RelayInfo) *VeoImageInput {
|
||||
mf, err := c.MultipartForm()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
files, exists := mf.File["input_reference"]
|
||||
if !exists || len(files) == 0 {
|
||||
return nil
|
||||
}
|
||||
fh := files[0]
|
||||
if fh.Size > maxVeoImageSize {
|
||||
return nil
|
||||
}
|
||||
file, err := fh.Open()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
fileBytes, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
mimeType := fh.Header.Get("Content-Type")
|
||||
if mimeType == "" || mimeType == "application/octet-stream" {
|
||||
mimeType = http.DetectContentType(fileBytes)
|
||||
}
|
||||
|
||||
info.Action = constant.TaskActionGenerate
|
||||
return &VeoImageInput{
|
||||
BytesBase64Encoded: base64.StdEncoding.EncodeToString(fileBytes),
|
||||
MimeType: mimeType,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseImageInput parses an image string (data URI or raw base64) into a
|
||||
// VeoImageInput. Returns nil if the input is empty or invalid.
|
||||
// TODO: support downloading HTTP URL images and converting to base64
|
||||
func ParseImageInput(imageStr string) *VeoImageInput {
|
||||
imageStr = strings.TrimSpace(imageStr)
|
||||
if imageStr == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
if strings.HasPrefix(imageStr, "data:") {
|
||||
return parseDataURI(imageStr)
|
||||
}
|
||||
|
||||
raw, err := base64.StdEncoding.DecodeString(imageStr)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &VeoImageInput{
|
||||
BytesBase64Encoded: imageStr,
|
||||
MimeType: http.DetectContentType(raw),
|
||||
}
|
||||
}
|
||||
|
||||
func parseDataURI(uri string) *VeoImageInput {
|
||||
// data:image/png;base64,iVBOR...
|
||||
rest := uri[len("data:"):]
|
||||
idx := strings.Index(rest, ",")
|
||||
if idx < 0 {
|
||||
return nil
|
||||
}
|
||||
meta := rest[:idx]
|
||||
b64 := rest[idx+1:]
|
||||
if b64 == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
mimeType := "application/octet-stream"
|
||||
parts := strings.SplitN(meta, ";", 2)
|
||||
if len(parts) >= 1 && parts[0] != "" {
|
||||
mimeType = parts[0]
|
||||
}
|
||||
|
||||
return &VeoImageInput{
|
||||
BytesBase64Encoded: b64,
|
||||
MimeType: mimeType,
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/QuantumNous/new-api/constant"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/relay/channel"
|
||||
geminitask "github.com/QuantumNous/new-api/relay/channel/task/gemini"
|
||||
taskcommon "github.com/QuantumNous/new-api/relay/channel/task/taskcommon"
|
||||
vertexcore "github.com/QuantumNous/new-api/relay/channel/vertex"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
@@ -26,9 +27,34 @@ import (
|
||||
// Request / Response structures
|
||||
// ============================
|
||||
|
||||
type veoInstance struct {
|
||||
Prompt string `json:"prompt"`
|
||||
Image *geminitask.VeoImageInput `json:"image,omitempty"`
|
||||
// TODO: support referenceImages (style/asset references, up to 3 images)
|
||||
// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
|
||||
}
|
||||
|
||||
type veoParameters struct {
|
||||
SampleCount int `json:"sampleCount"`
|
||||
DurationSeconds int `json:"durationSeconds,omitempty"`
|
||||
AspectRatio string `json:"aspectRatio,omitempty"`
|
||||
Resolution string `json:"resolution,omitempty"`
|
||||
NegativePrompt string `json:"negativePrompt,omitempty"`
|
||||
PersonGeneration string `json:"personGeneration,omitempty"`
|
||||
StorageUri string `json:"storageUri,omitempty"`
|
||||
CompressionQuality string `json:"compressionQuality,omitempty"`
|
||||
ResizeMode string `json:"resizeMode,omitempty"`
|
||||
Seed *int `json:"seed,omitempty"`
|
||||
GenerateAudio *bool `json:"generateAudio,omitempty"`
|
||||
}
|
||||
|
||||
type requestPayload struct {
|
||||
Instances []map[string]any `json:"instances"`
|
||||
Parameters map[string]any `json:"parameters,omitempty"`
|
||||
Instances []veoInstance `json:"instances"`
|
||||
Parameters *veoParameters `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type fetchOperationPayload struct {
|
||||
OperationName string `json:"operationName"`
|
||||
}
|
||||
|
||||
type submitResponse struct {
|
||||
@@ -134,25 +160,21 @@ func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info
|
||||
return nil
|
||||
}
|
||||
|
||||
// EstimateBilling 根据用户请求中的 sampleCount 计算 OtherRatios。
|
||||
func (a *TaskAdaptor) EstimateBilling(c *gin.Context, _ *relaycommon.RelayInfo) map[string]float64 {
|
||||
sampleCount := 1
|
||||
// EstimateBilling returns OtherRatios based on durationSeconds and resolution.
|
||||
func (a *TaskAdaptor) EstimateBilling(c *gin.Context, info *relaycommon.RelayInfo) map[string]float64 {
|
||||
v, ok := c.Get("task_request")
|
||||
if ok {
|
||||
req := v.(relaycommon.TaskSubmitReq)
|
||||
if req.Metadata != nil {
|
||||
if sc, exists := req.Metadata["sampleCount"]; exists {
|
||||
if i, ok := sc.(int); ok && i > 0 {
|
||||
sampleCount = i
|
||||
}
|
||||
if f, ok := sc.(float64); ok && int(f) > 0 {
|
||||
sampleCount = int(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
req := v.(relaycommon.TaskSubmitReq)
|
||||
|
||||
seconds := geminitask.ResolveVeoDuration(req.Metadata, req.Duration, req.Seconds)
|
||||
resolution := geminitask.ResolveVeoResolution(req.Metadata, req.Size)
|
||||
resRatio := geminitask.VeoResolutionRatio(info.UpstreamModelName, resolution)
|
||||
|
||||
return map[string]float64{
|
||||
"sampleCount": float64(sampleCount),
|
||||
"seconds": float64(seconds),
|
||||
"resolution": resRatio,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,29 +186,35 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn
|
||||
}
|
||||
req := v.(relaycommon.TaskSubmitReq)
|
||||
|
||||
body := requestPayload{
|
||||
Instances: []map[string]any{{"prompt": req.Prompt}},
|
||||
Parameters: map[string]any{},
|
||||
}
|
||||
if req.Metadata != nil {
|
||||
if v, ok := req.Metadata["storageUri"]; ok {
|
||||
body.Parameters["storageUri"] = v
|
||||
instance := veoInstance{Prompt: req.Prompt}
|
||||
if img := geminitask.ExtractMultipartImage(c, info); img != nil {
|
||||
instance.Image = img
|
||||
} else if len(req.Images) > 0 {
|
||||
if parsed := geminitask.ParseImageInput(req.Images[0]); parsed != nil {
|
||||
instance.Image = parsed
|
||||
info.Action = constant.TaskActionGenerate
|
||||
}
|
||||
if v, ok := req.Metadata["sampleCount"]; ok {
|
||||
if i, ok := v.(int); ok {
|
||||
body.Parameters["sampleCount"] = i
|
||||
}
|
||||
if f, ok := v.(float64); ok {
|
||||
body.Parameters["sampleCount"] = int(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, ok := body.Parameters["sampleCount"]; !ok {
|
||||
body.Parameters["sampleCount"] = 1
|
||||
}
|
||||
|
||||
if body.Parameters["sampleCount"].(int) <= 0 {
|
||||
return nil, fmt.Errorf("sampleCount must be greater than 0")
|
||||
params := &veoParameters{}
|
||||
if err := taskcommon.UnmarshalMetadata(req.Metadata, params); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal metadata failed: %w", err)
|
||||
}
|
||||
if params.DurationSeconds == 0 && req.Duration > 0 {
|
||||
params.DurationSeconds = req.Duration
|
||||
}
|
||||
if params.Resolution == "" && req.Size != "" {
|
||||
params.Resolution = geminitask.SizeToVeoResolution(req.Size)
|
||||
}
|
||||
if params.AspectRatio == "" && req.Size != "" {
|
||||
params.AspectRatio = geminitask.SizeToVeoAspectRatio(req.Size)
|
||||
}
|
||||
params.Resolution = strings.ToLower(params.Resolution)
|
||||
params.SampleCount = 1
|
||||
|
||||
body := requestPayload{
|
||||
Instances: []veoInstance{instance},
|
||||
Parameters: params,
|
||||
}
|
||||
|
||||
data, err := common.Marshal(body)
|
||||
@@ -226,7 +254,14 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela
|
||||
return localID, responseBody, nil
|
||||
}
|
||||
|
||||
func (a *TaskAdaptor) GetModelList() []string { return []string{"veo-3.0-generate-001"} }
|
||||
func (a *TaskAdaptor) GetModelList() []string {
|
||||
return []string{
|
||||
"veo-3.0-generate-001",
|
||||
"veo-3.0-fast-generate-001",
|
||||
"veo-3.1-generate-preview",
|
||||
"veo-3.1-fast-generate-preview",
|
||||
}
|
||||
}
|
||||
func (a *TaskAdaptor) GetChannelName() string { return "vertex" }
|
||||
|
||||
// FetchTask fetch task status
|
||||
@@ -254,7 +289,7 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any, proxy
|
||||
} else {
|
||||
url = fmt.Sprintf("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:fetchPredictOperation", region, project, region, modelName)
|
||||
}
|
||||
payload := map[string]string{"operationName": upstreamName}
|
||||
payload := fetchOperationPayload{OperationName: upstreamName}
|
||||
data, err := common.Marshal(payload)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -298,6 +298,10 @@ var defaultModelPrice = map[string]float64{
|
||||
"sora-2": 0.3,
|
||||
"sora-2-pro": 0.5,
|
||||
"gpt-4o-mini-tts": 0.3,
|
||||
"veo-3.0-generate-001": 0.4,
|
||||
"veo-3.0-fast-generate-001": 0.15,
|
||||
"veo-3.1-generate-preview": 0.4,
|
||||
"veo-3.1-fast-generate-preview": 0.15,
|
||||
}
|
||||
|
||||
var defaultAudioRatio = map[string]float64{
|
||||
|
||||
Reference in New Issue
Block a user