diff --git a/controller/video_proxy.go b/controller/video_proxy.go index 17e466ae8..2bfb0dc29 100644 --- a/controller/video_proxy.go +++ b/controller/video_proxy.go @@ -4,8 +4,10 @@ import ( "fmt" "io" "net/http" + "net/url" "time" + "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/logger" "github.com/QuantumNous/new-api/model" @@ -36,7 +38,7 @@ func VideoProxy(c *gin.Context) { return } if !exists || task == nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: %s", taskID, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: %v", taskID, err)) c.JSON(http.StatusNotFound, gin.H{ "error": gin.H{ "message": "Task not found", @@ -58,7 +60,7 @@ func VideoProxy(c *gin.Context) { channel, err := model.CacheGetChannel(task.ChannelId) if err != nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get channel %d: %s", task.ChannelId, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: not found", taskID)) c.JSON(http.StatusInternalServerError, gin.H{ "error": gin.H{ "message": "Failed to retrieve channel information", @@ -71,15 +73,15 @@ func VideoProxy(c *gin.Context) { if baseURL == "" { baseURL = "https://api.openai.com" } - videoURL := fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID) + var videoURL string client := &http.Client{ Timeout: 60 * time.Second, } - req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, videoURL, nil) + req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, "", nil) if err != nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request for %s: %s", videoURL, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request: %s", err.Error())) c.JSON(http.StatusInternalServerError, gin.H{ "error": gin.H{ "message": "Failed to create proxy request", @@ -89,7 +91,26 @@ func VideoProxy(c *gin.Context) { return } - req.Header.Set("Authorization", "Bearer "+channel.Key) + if channel.Type == constant.ChannelTypeGemini { + videoURL = fmt.Sprintf("%s&key=%s", c.Query("url"), channel.Key) + req.Header.Set("x-goog-api-key", channel.Key) + } else { + // Default (Sora, etc.): Use original logic + videoURL = fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID) + req.Header.Set("Authorization", "Bearer "+channel.Key) + } + + req.URL, err = url.Parse(videoURL) + if err != nil { + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to parse URL %s: %s", videoURL, err.Error())) + c.JSON(http.StatusInternalServerError, gin.H{ + "error": gin.H{ + "message": "Failed to create proxy request", + "type": "server_error", + }, + }) + return + } resp, err := client.Do(req) if err != nil { diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go new file mode 100644 index 000000000..092059c67 --- /dev/null +++ b/relay/channel/task/gemini/adaptor.go @@ -0,0 +1,276 @@ +package gemini + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/QuantumNous/new-api/constant" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/model" + "github.com/QuantumNous/new-api/relay/channel" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/system_setting" + "github.com/gin-gonic/gin" + "github.com/pkg/errors" +) + +// ============================ +// Request / Response structures +// ============================ + +// GeminiVideoGenerationConfig represents the video generation configuration +// Based on: https://ai.google.dev/gemini-api/docs/video +type GeminiVideoGenerationConfig struct { + AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16" + DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number) + NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements + PersonGeneration string `json:"personGeneration,omitempty"` // "allow_all" for text-to-video, "allow_adult" for image-to-video + Resolution string `json:"resolution,omitempty"` // video resolution +} + +// GeminiVideoRequest represents a single video generation instance +type GeminiVideoRequest struct { + Prompt string `json:"prompt"` +} + +// GeminiVideoPayload represents the complete video generation request payload +type GeminiVideoPayload struct { + Instances []GeminiVideoRequest `json:"instances"` + Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"` +} + +type submitResponse struct { + Name string `json:"name"` +} + +type operationVideo struct { + MimeType string `json:"mimeType"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` +} + +type operationResponse struct { + Name string `json:"name"` + Done bool `json:"done"` + Response struct { + Type string `json:"@type"` + RaiMediaFilteredCount int `json:"raiMediaFilteredCount"` + Videos []operationVideo `json:"videos"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` + Video string `json:"video"` + GenerateVideoResponse struct { + GeneratedSamples []struct { + Video struct { + URI string `json:"uri"` + } `json:"video"` + } `json:"generatedSamples"` + } `json:"generateVideoResponse"` + } `json:"response"` + Error struct { + Message string `json:"message"` + } `json:"error"` +} + +// ============================ +// Adaptor implementation +// ============================ + +type TaskAdaptor struct { + ChannelType int + apiKey string + baseURL string +} + +func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) { + a.ChannelType = info.ChannelType + a.baseURL = info.ChannelBaseUrl + a.apiKey = info.ApiKey +} + +// ValidateRequestAndSetAction parses body, validates fields and sets default action. +func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) { + // Use the standard validation method for TaskSubmitReq + return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate) +} + +// BuildRequestURL constructs the upstream URL. +func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) { + modelName := info.OriginModelName + version := model_setting.GetGeminiVersionSetting(modelName) + + return fmt.Sprintf( + "%s/%s/models/%s:predictLongRunning", + a.baseURL, + version, + modelName, + ), nil +} + +// BuildRequestHeader sets required headers. +func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info *relaycommon.RelayInfo) error { + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", a.apiKey) + return nil +} + +// BuildRequestBody converts request into Gemini specific format. +func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) { + v, ok := c.Get("task_request") + if !ok { + return nil, fmt.Errorf("request not found in context") + } + req, ok := v.(relaycommon.TaskSubmitReq) + if !ok { + return nil, fmt.Errorf("unexpected task_request type") + } + + // Create structured video generation request + body := GeminiVideoPayload{ + Instances: []GeminiVideoRequest{ + {Prompt: req.Prompt}, + }, + Parameters: GeminiVideoGenerationConfig{}, + } + + metadata := req.Metadata + medaBytes, err := json.Marshal(metadata) + if err != nil { + return nil, errors.Wrap(err, "metadata marshal metadata failed") + } + err = json.Unmarshal(medaBytes, &body.Parameters) + if err != nil { + return nil, errors.Wrap(err, "unmarshal metadata failed") + } + + data, err := json.Marshal(body) + if err != nil { + return nil, err + } + return bytes.NewReader(data), nil +} + +// DoRequest delegates to common helper. +func (a *TaskAdaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (*http.Response, error) { + return channel.DoTaskApiRequest(a, c, info, requestBody) +} + +// DoResponse handles upstream response, returns taskID etc. +func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (taskID string, taskData []byte, taskErr *dto.TaskError) { + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", nil, service.TaskErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError) + } + _ = resp.Body.Close() + + var s submitResponse + if err := json.Unmarshal(responseBody, &s); err != nil { + return "", nil, service.TaskErrorWrapper(err, "unmarshal_response_failed", http.StatusInternalServerError) + } + if strings.TrimSpace(s.Name) == "" { + return "", nil, service.TaskErrorWrapper(fmt.Errorf("missing operation name"), "invalid_response", http.StatusInternalServerError) + } + taskID = encodeLocalTaskID(s.Name) + ov := dto.NewOpenAIVideo() + ov.ID = taskID + ov.TaskID = taskID + ov.CreatedAt = time.Now().Unix() + ov.Model = info.OriginModelName + c.JSON(http.StatusOK, ov) + return taskID, responseBody, nil +} + +func (a *TaskAdaptor) GetModelList() []string { + return []string{"veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"} +} + +func (a *TaskAdaptor) GetChannelName() string { + return "gemini" +} + +// FetchTask fetch task status +func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) { + taskID, ok := body["task_id"].(string) + if !ok { + return nil, fmt.Errorf("invalid task_id") + } + + upstreamName, err := decodeLocalTaskID(taskID) + if err != nil { + return nil, fmt.Errorf("decode task_id failed: %w", err) + } + + // For Gemini API, we use GET request to the operations endpoint + version := model_setting.GetGeminiVersionSetting("default") + url := fmt.Sprintf("%s/%s/%s", baseUrl, version, upstreamName) + + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", key) + + return service.GetHttpClient().Do(req) +} + +func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error) { + var op operationResponse + if err := json.Unmarshal(respBody, &op); err != nil { + return nil, fmt.Errorf("unmarshal operation response failed: %w", err) + } + + ti := &relaycommon.TaskInfo{} + + if op.Error.Message != "" { + ti.Status = model.TaskStatusFailure + ti.Reason = op.Error.Message + ti.Progress = "100%" + return ti, nil + } + + if !op.Done { + ti.Status = model.TaskStatusInProgress + ti.Progress = "50%" + return ti, nil + } + + ti.Status = model.TaskStatusSuccess + ti.Progress = "100%" + + // Extract URL from generateVideoResponse if available + if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 { + if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" { + taskID := encodeLocalTaskID(op.Name) + ti.Url = fmt.Sprintf("%s/v1/videos/%s/content?url=%s", system_setting.ServerAddress, taskID, uri) + } + } + + return ti, nil +} + +// ============================ +// helpers +// ============================ + +func encodeLocalTaskID(name string) string { + return base64.RawURLEncoding.EncodeToString([]byte(name)) +} + +func decodeLocalTaskID(local string) (string, error) { + b, err := base64.RawURLEncoding.DecodeString(local) + if err != nil { + return "", err + } + return string(b), nil +} diff --git a/relay/relay_adaptor.go b/relay/relay_adaptor.go index 736a58223..6edb9a8cf 100644 --- a/relay/relay_adaptor.go +++ b/relay/relay_adaptor.go @@ -29,6 +29,7 @@ import ( "github.com/QuantumNous/new-api/relay/channel/siliconflow" "github.com/QuantumNous/new-api/relay/channel/submodel" taskdoubao "github.com/QuantumNous/new-api/relay/channel/task/doubao" + taskGemini "github.com/QuantumNous/new-api/relay/channel/task/gemini" taskjimeng "github.com/QuantumNous/new-api/relay/channel/task/jimeng" "github.com/QuantumNous/new-api/relay/channel/task/kling" tasksora "github.com/QuantumNous/new-api/relay/channel/task/sora" @@ -144,6 +145,8 @@ func GetTaskAdaptor(platform constant.TaskPlatform) channel.TaskAdaptor { return &taskdoubao.TaskAdaptor{} case constant.ChannelTypeSora, constant.ChannelTypeOpenAI: return &tasksora.TaskAdaptor{} + case constant.ChannelTypeGemini: + return &taskGemini.TaskAdaptor{} } } return nil