diff --git a/relay/channel/task/hailuo/adaptor.go b/relay/channel/task/hailuo/adaptor.go index 8920e70f6..cb6f1eebd 100644 --- a/relay/channel/task/hailuo/adaptor.go +++ b/relay/channel/task/hailuo/adaptor.go @@ -22,6 +22,7 @@ import ( "github.com/QuantumNous/new-api/service" ) +// https://platform.minimaxi.com/docs/api-reference/video-generation-intro type TaskAdaptor struct { ChannelType int apiKey string @@ -84,7 +85,7 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela } _ = resp.Body.Close() - var hResp TextToVideoResponse + var hResp VideoResponse if err := json.Unmarshal(responseBody, &hResp); err != nil { taskErr = service.TaskErrorWrapper(errors.Wrapf(err, "body: %s", responseBody), "unmarshal_response_body_failed", http.StatusInternalServerError) return @@ -136,86 +137,28 @@ func (a *TaskAdaptor) GetChannelName() string { return ChannelName } -func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*TextToVideoRequest, error) { +func (a *TaskAdaptor) convertToRequestPayload(req *relaycommon.TaskSubmitReq) (*VideoRequest, error) { modelConfig := GetModelConfig(req.Model) - if !contains(ModelList, req.Model) { - return nil, fmt.Errorf("unsupported model: %s", req.Model) - } - duration := DefaultDuration if req.Duration > 0 { duration = req.Duration } - - if !containsInt(modelConfig.SupportedDurations, duration) { - return nil, fmt.Errorf("duration %d is not supported by model %s, supported durations: %v", - duration, req.Model, modelConfig.SupportedDurations) - } - resolution := modelConfig.DefaultResolution if req.Size != "" { resolution = a.parseResolutionFromSize(req.Size, modelConfig) } - if !contains(modelConfig.SupportedResolutions, resolution) { - return nil, fmt.Errorf("resolution %s is not supported by model %s, supported resolutions: %v", - resolution, req.Model, modelConfig.SupportedResolutions) - } - - hailuoReq := &TextToVideoRequest{ + videoRequest := &VideoRequest{ Model: req.Model, Prompt: req.Prompt, Duration: &duration, Resolution: resolution, } - - promptOptimizer := DefaultPromptOptimizer - hailuoReq.PromptOptimizer = &promptOptimizer - - metadata := req.Metadata - if metadata != nil { - metadataBytes, err := json.Marshal(metadata) - if err != nil { - return nil, errors.Wrap(err, "marshal metadata failed") - } - - var metadataMap map[string]interface{} - if err := json.Unmarshal(metadataBytes, &metadataMap); err != nil { - return nil, errors.Wrap(err, "unmarshal metadata failed") - } - - if val, exists := metadataMap["prompt_optimizer"]; exists { - if boolVal, ok := val.(bool); ok { - hailuoReq.PromptOptimizer = &boolVal - } - } - - if modelConfig.HasFastPretreatment { - if val, exists := metadataMap["fast_pretreatment"]; exists { - if boolVal, ok := val.(bool); ok { - hailuoReq.FastPretreatment = &boolVal - } - } - } - - if val, exists := metadataMap["callback_url"]; exists { - if strVal, ok := val.(string); ok { - hailuoReq.CallbackURL = strVal - } - } - - if val, exists := metadataMap["aigc_watermark"]; exists { - if boolVal, ok := val.(bool); ok { - hailuoReq.AigcWatermark = &boolVal - } - } + if err := req.UnmarshalMetadata(&videoRequest); err != nil { + return nil, errors.Wrap(err, "unmarshal metadata to video request failed") } - if req.HasImage() { - return nil, fmt.Errorf("image input is not supported by hailuo video generation") - } - - return hailuoReq, nil + return videoRequest, nil } func (a *TaskAdaptor) parseResolutionFromSize(size string, modelConfig ModelConfig) string { @@ -226,6 +169,8 @@ func (a *TaskAdaptor) parseResolutionFromSize(size string, modelConfig ModelConf return Resolution768P case strings.Contains(size, "720"): return Resolution720P + case strings.Contains(size, "512"): + return Resolution512P default: return modelConfig.DefaultResolution } diff --git a/relay/channel/task/hailuo/constants.go b/relay/channel/task/hailuo/constants.go index 7932e8a51..5e5408637 100644 --- a/relay/channel/task/hailuo/constants.go +++ b/relay/channel/task/hailuo/constants.go @@ -6,9 +6,14 @@ const ( var ModelList = []string{ "MiniMax-Hailuo-2.3", + "MiniMax-Hailuo-2.3-Fast", "MiniMax-Hailuo-02", "T2V-01-Director", "T2V-01", + "I2V-01-Director", + "I2V-01-live", + "I2V-01", + "S2V-01", } const ( @@ -35,13 +40,13 @@ const ( ) const ( + Resolution512P = "512P" Resolution720P = "720P" Resolution768P = "768P" Resolution1080P = "1080P" ) const ( - DefaultDuration = 6 - DefaultResolution = Resolution768P - DefaultPromptOptimizer = true + DefaultDuration = 6 + DefaultResolution = Resolution720P ) diff --git a/relay/channel/task/hailuo/models.go b/relay/channel/task/hailuo/models.go index 242857015..09a97766f 100644 --- a/relay/channel/task/hailuo/models.go +++ b/relay/channel/task/hailuo/models.go @@ -1,17 +1,25 @@ package hailuo -type TextToVideoRequest struct { - Model string `json:"model"` - Prompt string `json:"prompt"` - PromptOptimizer *bool `json:"prompt_optimizer,omitempty"` - FastPretreatment *bool `json:"fast_pretreatment,omitempty"` - Duration *int `json:"duration,omitempty"` - Resolution string `json:"resolution,omitempty"` - CallbackURL string `json:"callback_url,omitempty"` - AigcWatermark *bool `json:"aigc_watermark,omitempty"` +type SubjectReference struct { + Type string `json:"type"` // Subject type, currently only supports "character" + Image []string `json:"image"` // Array of subject reference images (currently only supports single image) } -type TextToVideoResponse struct { +type VideoRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt,omitempty"` + PromptOptimizer *bool `json:"prompt_optimizer,omitempty"` + FastPretreatment *bool `json:"fast_pretreatment,omitempty"` + Duration *int `json:"duration,omitempty"` + Resolution string `json:"resolution,omitempty"` + CallbackURL string `json:"callback_url,omitempty"` + AigcWatermark *bool `json:"aigc_watermark,omitempty"` + FirstFrameImage string `json:"first_frame_image,omitempty"` // For image-to-video and start-end-to-video + LastFrameImage string `json:"last_frame_image,omitempty"` // For start-end-to-video + SubjectReference []SubjectReference `json:"subject_reference,omitempty"` // For subject-reference-to-video +} + +type VideoResponse struct { TaskID string `json:"task_id"` BaseResp BaseResp `json:"base_resp"` } @@ -81,11 +89,19 @@ func GetModelConfig(model string) ModelConfig { HasPromptOptimizer: true, HasFastPretreatment: true, }, + "MiniMax-Hailuo-2.3-Fast": { + Name: "MiniMax-Hailuo-2.3-Fast", + DefaultResolution: Resolution768P, + SupportedDurations: []int{6, 10}, + SupportedResolutions: []string{Resolution768P, Resolution1080P}, + HasPromptOptimizer: true, + HasFastPretreatment: true, + }, "MiniMax-Hailuo-02": { Name: "MiniMax-Hailuo-02", DefaultResolution: Resolution768P, SupportedDurations: []int{6, 10}, - SupportedResolutions: []string{Resolution768P, Resolution1080P}, + SupportedResolutions: []string{Resolution512P, Resolution768P, Resolution1080P}, HasPromptOptimizer: true, HasFastPretreatment: true, }, @@ -105,6 +121,38 @@ func GetModelConfig(model string) ModelConfig { HasPromptOptimizer: true, HasFastPretreatment: false, }, + "I2V-01-Director": { + Name: "I2V-01-Director", + DefaultResolution: Resolution720P, + SupportedDurations: []int{6}, + SupportedResolutions: []string{Resolution720P, Resolution1080P}, + HasPromptOptimizer: true, + HasFastPretreatment: false, + }, + "I2V-01-live": { + Name: "I2V-01-live", + DefaultResolution: Resolution720P, + SupportedDurations: []int{6}, + SupportedResolutions: []string{Resolution720P, Resolution1080P}, + HasPromptOptimizer: true, + HasFastPretreatment: false, + }, + "I2V-01": { + Name: "I2V-01", + DefaultResolution: Resolution720P, + SupportedDurations: []int{6}, + SupportedResolutions: []string{Resolution720P, Resolution1080P}, + HasPromptOptimizer: true, + HasFastPretreatment: false, + }, + "S2V-01": { + Name: "S2V-01", + DefaultResolution: Resolution720P, + SupportedDurations: []int{6}, + SupportedResolutions: []string{Resolution720P}, + HasPromptOptimizer: true, + HasFastPretreatment: false, + }, } if config, exists := configs[model]; exists { @@ -113,9 +161,9 @@ func GetModelConfig(model string) ModelConfig { return ModelConfig{ Name: model, - DefaultResolution: Resolution720P, + DefaultResolution: DefaultResolution, SupportedDurations: []int{6}, - SupportedResolutions: []string{Resolution720P}, + SupportedResolutions: []string{DefaultResolution}, HasPromptOptimizer: true, HasFastPretreatment: false, } diff --git a/relay/common/relay_info.go b/relay/common/relay_info.go index 10601298c..33ef4d14c 100644 --- a/relay/common/relay_info.go +++ b/relay/common/relay_info.go @@ -498,11 +498,11 @@ type TaskSubmitReq struct { Metadata map[string]interface{} `json:"metadata,omitempty"` } -func (t TaskSubmitReq) GetPrompt() string { +func (t *TaskSubmitReq) GetPrompt() string { return t.Prompt } -func (t TaskSubmitReq) HasImage() bool { +func (t *TaskSubmitReq) HasImage() bool { return len(t.Images) > 0 } @@ -537,6 +537,20 @@ func (t *TaskSubmitReq) UnmarshalJSON(data []byte) error { return nil } +func (t *TaskSubmitReq) UnmarshalMetadata(v any) error { + metadata := t.Metadata + if metadata != nil { + metadataBytes, err := json.Marshal(metadata) + if err != nil { + return fmt.Errorf("marshal metadata failed: %w", err) + } + err = json.Unmarshal(metadataBytes, v) + if err != nil { + return fmt.Errorf("unmarshal metadata to target failed: %w", err) + } + } + return nil +} type TaskInfo struct { Code int `json:"code"`