feat: ionet integrate (#2105)

* wip ionet integrate

* wip ionet integrate

* wip ionet integrate

* ollama wip

* wip

* feat: ionet integration & ollama manage

* fix merge conflict

* wip

* fix: test conn cors

* wip

* fix ionet

* fix ionet

* wip

* fix model select

* refactor: Remove `pkg/ionet` test files and update related Go source and web UI model deployment components.

* feat: Enhance model deployment UI with styling improvements, updated text, and a new description component.

* Revert "feat: Enhance model deployment UI with styling improvements, updated text, and a new description component."

This reverts commit 8b75cb5bf0d1a534b339df8c033be9a6c7df7964.
This commit is contained in:
Seefs
2025-12-28 15:55:35 +08:00
committed by GitHub
parent 1a69a93d20
commit 725d61c5d3
51 changed files with 11895 additions and 369 deletions

View File

@@ -67,3 +67,40 @@ type OllamaEmbeddingResponse struct {
Embeddings [][]float64 `json:"embeddings"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
}
type OllamaTagsResponse struct {
Models []OllamaModel `json:"models"`
}
type OllamaModel struct {
Name string `json:"name"`
Size int64 `json:"size"`
Digest string `json:"digest,omitempty"`
ModifiedAt string `json:"modified_at"`
Details OllamaModelDetail `json:"details,omitempty"`
}
type OllamaModelDetail struct {
ParentModel string `json:"parent_model,omitempty"`
Format string `json:"format,omitempty"`
Family string `json:"family,omitempty"`
Families []string `json:"families,omitempty"`
ParameterSize string `json:"parameter_size,omitempty"`
QuantizationLevel string `json:"quantization_level,omitempty"`
}
type OllamaPullRequest struct {
Name string `json:"name"`
Stream bool `json:"stream,omitempty"`
}
type OllamaPullResponse struct {
Status string `json:"status"`
Digest string `json:"digest,omitempty"`
Total int64 `json:"total,omitempty"`
Completed int64 `json:"completed,omitempty"`
}
type OllamaDeleteRequest struct {
Name string `json:"name"`
}

View File

@@ -1,11 +1,13 @@
package ollama
import (
"bufio"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
@@ -283,3 +285,246 @@ func ollamaEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *h
service.IOCopyBytesGracefully(c, resp, out)
return usage, nil
}
func FetchOllamaModels(baseURL, apiKey string) ([]OllamaModel, error) {
url := fmt.Sprintf("%s/api/tags", baseURL)
client := &http.Client{}
request, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("创建请求失败: %v", err)
}
// Ollama 通常不需要 Bearer token但为了兼容性保留
if apiKey != "" {
request.Header.Set("Authorization", "Bearer "+apiKey)
}
response, err := client.Do(request)
if err != nil {
return nil, fmt.Errorf("请求失败: %v", err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(response.Body)
return nil, fmt.Errorf("服务器返回错误 %d: %s", response.StatusCode, string(body))
}
var tagsResponse OllamaTagsResponse
body, err := io.ReadAll(response.Body)
if err != nil {
return nil, fmt.Errorf("读取响应失败: %v", err)
}
err = common.Unmarshal(body, &tagsResponse)
if err != nil {
return nil, fmt.Errorf("解析响应失败: %v", err)
}
return tagsResponse.Models, nil
}
// 拉取 Ollama 模型 (非流式)
func PullOllamaModel(baseURL, apiKey, modelName string) error {
url := fmt.Sprintf("%s/api/pull", baseURL)
pullRequest := OllamaPullRequest{
Name: modelName,
Stream: false, // 非流式,简化处理
}
requestBody, err := common.Marshal(pullRequest)
if err != nil {
return fmt.Errorf("序列化请求失败: %v", err)
}
client := &http.Client{
Timeout: 30 * 60 * 1000 * time.Millisecond, // 30分钟超时支持大模型
}
request, err := http.NewRequest("POST", url, strings.NewReader(string(requestBody)))
if err != nil {
return fmt.Errorf("创建请求失败: %v", err)
}
request.Header.Set("Content-Type", "application/json")
if apiKey != "" {
request.Header.Set("Authorization", "Bearer "+apiKey)
}
response, err := client.Do(request)
if err != nil {
return fmt.Errorf("请求失败: %v", err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(response.Body)
return fmt.Errorf("拉取模型失败 %d: %s", response.StatusCode, string(body))
}
return nil
}
// 流式拉取 Ollama 模型 (支持进度回调)
func PullOllamaModelStream(baseURL, apiKey, modelName string, progressCallback func(OllamaPullResponse)) error {
url := fmt.Sprintf("%s/api/pull", baseURL)
pullRequest := OllamaPullRequest{
Name: modelName,
Stream: true, // 启用流式
}
requestBody, err := common.Marshal(pullRequest)
if err != nil {
return fmt.Errorf("序列化请求失败: %v", err)
}
client := &http.Client{
Timeout: 60 * 60 * 1000 * time.Millisecond, // 1小时超时支持超大模型
}
request, err := http.NewRequest("POST", url, strings.NewReader(string(requestBody)))
if err != nil {
return fmt.Errorf("创建请求失败: %v", err)
}
request.Header.Set("Content-Type", "application/json")
if apiKey != "" {
request.Header.Set("Authorization", "Bearer "+apiKey)
}
response, err := client.Do(request)
if err != nil {
return fmt.Errorf("请求失败: %v", err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(response.Body)
return fmt.Errorf("拉取模型失败 %d: %s", response.StatusCode, string(body))
}
// 读取流式响应
scanner := bufio.NewScanner(response.Body)
successful := false
for scanner.Scan() {
line := scanner.Text()
if strings.TrimSpace(line) == "" {
continue
}
var pullResponse OllamaPullResponse
if err := common.Unmarshal([]byte(line), &pullResponse); err != nil {
continue // 忽略解析失败的行
}
if progressCallback != nil {
progressCallback(pullResponse)
}
// 检查是否出现错误或完成
if strings.EqualFold(pullResponse.Status, "error") {
return fmt.Errorf("拉取模型失败: %s", strings.TrimSpace(line))
}
if strings.EqualFold(pullResponse.Status, "success") {
successful = true
break
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("读取流式响应失败: %v", err)
}
if !successful {
return fmt.Errorf("拉取模型未完成: 未收到成功状态")
}
return nil
}
// 删除 Ollama 模型
func DeleteOllamaModel(baseURL, apiKey, modelName string) error {
url := fmt.Sprintf("%s/api/delete", baseURL)
deleteRequest := OllamaDeleteRequest{
Name: modelName,
}
requestBody, err := common.Marshal(deleteRequest)
if err != nil {
return fmt.Errorf("序列化请求失败: %v", err)
}
client := &http.Client{}
request, err := http.NewRequest("DELETE", url, strings.NewReader(string(requestBody)))
if err != nil {
return fmt.Errorf("创建请求失败: %v", err)
}
request.Header.Set("Content-Type", "application/json")
if apiKey != "" {
request.Header.Set("Authorization", "Bearer "+apiKey)
}
response, err := client.Do(request)
if err != nil {
return fmt.Errorf("请求失败: %v", err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(response.Body)
return fmt.Errorf("删除模型失败 %d: %s", response.StatusCode, string(body))
}
return nil
}
func FetchOllamaVersion(baseURL, apiKey string) (string, error) {
trimmedBase := strings.TrimRight(baseURL, "/")
if trimmedBase == "" {
return "", fmt.Errorf("baseURL 为空")
}
url := fmt.Sprintf("%s/api/version", trimmedBase)
client := &http.Client{Timeout: 10 * time.Second}
request, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", fmt.Errorf("创建请求失败: %v", err)
}
if apiKey != "" {
request.Header.Set("Authorization", "Bearer "+apiKey)
}
response, err := client.Do(request)
if err != nil {
return "", fmt.Errorf("请求失败: %v", err)
}
defer response.Body.Close()
body, err := io.ReadAll(response.Body)
if err != nil {
return "", fmt.Errorf("读取响应失败: %v", err)
}
if response.StatusCode != http.StatusOK {
return "", fmt.Errorf("查询版本失败 %d: %s", response.StatusCode, string(body))
}
var versionResp struct {
Version string `json:"version"`
}
if err := json.Unmarshal(body, &versionResp); err != nil {
return "", fmt.Errorf("解析响应失败: %v", err)
}
if versionResp.Version == "" {
return "", fmt.Errorf("未返回版本信息")
}
return versionResp.Version, nil
}