diff --git a/common/endpoint_defaults.go b/common/endpoint_defaults.go index 6717a22fd..11ec79217 100644 --- a/common/endpoint_defaults.go +++ b/common/endpoint_defaults.go @@ -17,13 +17,14 @@ type EndpointInfo struct { // defaultEndpointInfoMap 保存内置端点的默认 Path 与 Method var defaultEndpointInfoMap = map[constant.EndpointType]EndpointInfo{ - constant.EndpointTypeOpenAI: {Path: "/v1/chat/completions", Method: "POST"}, - constant.EndpointTypeOpenAIResponse: {Path: "/v1/responses", Method: "POST"}, - constant.EndpointTypeAnthropic: {Path: "/v1/messages", Method: "POST"}, - constant.EndpointTypeGemini: {Path: "/v1beta/models/{model}:generateContent", Method: "POST"}, - constant.EndpointTypeJinaRerank: {Path: "/v1/rerank", Method: "POST"}, - constant.EndpointTypeImageGeneration: {Path: "/v1/images/generations", Method: "POST"}, - constant.EndpointTypeEmbeddings: {Path: "/v1/embeddings", Method: "POST"}, + constant.EndpointTypeOpenAI: {Path: "/v1/chat/completions", Method: "POST"}, + constant.EndpointTypeOpenAIResponse: {Path: "/v1/responses", Method: "POST"}, + constant.EndpointTypeOpenAIResponseCompact: {Path: "/v1/responses/compact", Method: "POST"}, + constant.EndpointTypeAnthropic: {Path: "/v1/messages", Method: "POST"}, + constant.EndpointTypeGemini: {Path: "/v1beta/models/{model}:generateContent", Method: "POST"}, + constant.EndpointTypeJinaRerank: {Path: "/v1/rerank", Method: "POST"}, + constant.EndpointTypeImageGeneration: {Path: "/v1/images/generations", Method: "POST"}, + constant.EndpointTypeEmbeddings: {Path: "/v1/embeddings", Method: "POST"}, } // GetDefaultEndpointInfo 返回指定端点类型的默认信息以及是否存在 diff --git a/constant/endpoint_type.go b/constant/endpoint_type.go index 29c0d31e3..8681bf06e 100644 --- a/constant/endpoint_type.go +++ b/constant/endpoint_type.go @@ -3,14 +3,15 @@ package constant type EndpointType string const ( - EndpointTypeOpenAI EndpointType = "openai" - EndpointTypeOpenAIResponse EndpointType = "openai-response" - EndpointTypeAnthropic EndpointType = "anthropic" - EndpointTypeGemini EndpointType = "gemini" - EndpointTypeJinaRerank EndpointType = "jina-rerank" - EndpointTypeImageGeneration EndpointType = "image-generation" - EndpointTypeEmbeddings EndpointType = "embeddings" - EndpointTypeOpenAIVideo EndpointType = "openai-video" + EndpointTypeOpenAI EndpointType = "openai" + EndpointTypeOpenAIResponse EndpointType = "openai-response" + EndpointTypeOpenAIResponseCompact EndpointType = "openai-response-compact" + EndpointTypeAnthropic EndpointType = "anthropic" + EndpointTypeGemini EndpointType = "gemini" + EndpointTypeJinaRerank EndpointType = "jina-rerank" + EndpointTypeImageGeneration EndpointType = "image-generation" + EndpointTypeEmbeddings EndpointType = "embeddings" + EndpointTypeOpenAIVideo EndpointType = "openai-video" //EndpointTypeMidjourney EndpointType = "midjourney-proxy" //EndpointTypeSuno EndpointType = "suno-proxy" //EndpointTypeKling EndpointType = "kling" diff --git a/controller/channel-test.go b/controller/channel-test.go index 03f9139b7..5ae04e8a0 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -26,6 +26,7 @@ import ( "github.com/QuantumNous/new-api/relay/helper" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/operation_setting" + "github.com/QuantumNous/new-api/setting/ratio_setting" "github.com/QuantumNous/new-api/types" "github.com/bytedance/gopkg/util/gopool" @@ -107,6 +108,14 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) if strings.Contains(strings.ToLower(testModel), "codex") { requestPath = "/v1/responses" } + + // responses compaction models (must use /v1/responses/compact) + if strings.HasSuffix(testModel, ratio_setting.CompactModelSuffix) { + requestPath = "/v1/responses/compact" + } + } + if strings.HasPrefix(requestPath, "/v1/responses/compact") { + testModel = ratio_setting.WithCompactModelSuffix(testModel) } c.Request = &http.Request{ @@ -150,6 +159,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) relayFormat = types.RelayFormatOpenAI case constant.EndpointTypeOpenAIResponse: relayFormat = types.RelayFormatOpenAIResponses + case constant.EndpointTypeOpenAIResponseCompact: + relayFormat = types.RelayFormatOpenAIResponsesCompaction case constant.EndpointTypeAnthropic: relayFormat = types.RelayFormatClaude case constant.EndpointTypeGemini: @@ -184,6 +195,9 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) if c.Request.URL.Path == "/v1/responses" { relayFormat = types.RelayFormatOpenAIResponses } + if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") { + relayFormat = types.RelayFormatOpenAIResponsesCompaction + } } request := buildTestRequest(testModel, endpointType, channel) @@ -215,6 +229,15 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) request.SetModelName(testModel) apiType, _ := common.ChannelType2APIType(channel.Type) + if info.RelayMode == relayconstant.RelayModeResponsesCompact && + apiType != constant.APITypeOpenAI && + apiType != constant.APITypeCodex { + return testResult{ + context: c, + localErr: fmt.Errorf("responses compaction test only supports openai/codex channels, got api type %d", apiType), + newAPIError: types.NewError(fmt.Errorf("unsupported api type: %d", apiType), types.ErrorCodeInvalidApiType), + } + } adaptor := relay.GetAdaptor(apiType) if adaptor == nil { return testResult{ @@ -287,6 +310,25 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) newAPIError: types.NewError(errors.New("invalid response request type"), types.ErrorCodeConvertRequestFailed), } } + case relayconstant.RelayModeResponsesCompact: + // Response compaction request - convert to OpenAIResponsesRequest before adapting + switch req := request.(type) { + case *dto.OpenAIResponsesCompactionRequest: + convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, dto.OpenAIResponsesRequest{ + Model: req.Model, + Input: req.Input, + Instructions: req.Instructions, + PreviousResponseID: req.PreviousResponseID, + }) + case *dto.OpenAIResponsesRequest: + convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, *req) + default: + return testResult{ + context: c, + localErr: errors.New("invalid response compaction request type"), + newAPIError: types.NewError(errors.New("invalid response compaction request type"), types.ErrorCodeConvertRequestFailed), + } + } default: // Chat/Completion 等其他请求类型 if generalReq, ok := request.(*dto.GeneralOpenAIRequest); ok { @@ -432,6 +474,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string) } func buildTestRequest(model string, endpointType string, channel *model.Channel) dto.Request { + testResponsesInput := json.RawMessage(`[{"role":"user","content":"hi"}]`) + // 根据端点类型构建不同的测试请求 if endpointType != "" { switch constant.EndpointType(endpointType) { @@ -463,6 +507,12 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel) Model: model, Input: json.RawMessage(`[{"role":"user","content":"hi"}]`), } + case constant.EndpointTypeOpenAIResponseCompact: + // 返回 OpenAIResponsesCompactionRequest + return &dto.OpenAIResponsesCompactionRequest{ + Model: model, + Input: testResponsesInput, + } case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI: // 返回 GeneralOpenAIRequest maxTokens := uint(16) @@ -504,6 +554,14 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel) } } + // Responses compaction models (must use /v1/responses/compact) + if strings.HasSuffix(model, ratio_setting.CompactModelSuffix) { + return &dto.OpenAIResponsesCompactionRequest{ + Model: model, + Input: testResponsesInput, + } + } + // Responses-only models (e.g. codex series) if strings.Contains(strings.ToLower(model), "codex") { return &dto.OpenAIResponsesRequest{ diff --git a/controller/relay.go b/controller/relay.go index 906a6969b..9197847e4 100644 --- a/controller/relay.go +++ b/controller/relay.go @@ -45,7 +45,7 @@ func relayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIErro err = relay.RerankHelper(c, info) case relayconstant.RelayModeEmbeddings: err = relay.EmbeddingHelper(c, info) - case relayconstant.RelayModeResponses: + case relayconstant.RelayModeResponses, relayconstant.RelayModeResponsesCompact: err = relay.ResponsesHelper(c, info) default: err = relay.TextHelper(c, info) diff --git a/docs/openapi/relay.json b/docs/openapi/relay.json index 166bd8533..b6dfbd312 100644 --- a/docs/openapi/relay.json +++ b/docs/openapi/relay.json @@ -284,6 +284,46 @@ } ] } + }, + "/v1/responses/compact": { + "post": { + "summary": "压缩对话 (OpenAI Responses API)", + "deprecated": false, + "description": "OpenAI Responses API,用于对长对话进行 compaction。", + "operationId": "compactResponse", + "tags": [ + "OpenAI格式(Responses)" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ResponsesCompactionRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "成功压缩对话", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ResponsesCompactionResponse" + } + } + }, + "headers": {} + } + }, + "security": [ + { + "BearerAuth": [] + } + ] + } }, "/v1/images/generations": { "post": { @@ -3130,10 +3170,71 @@ } } }, - "ResponsesStreamResponse": { - "type": "object", - "properties": { - "type": { + "ResponsesCompactionResponse": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "object": { + "type": "string", + "example": "response.compaction" + }, + "created_at": { + "type": "integer" + }, + "output": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + }, + "usage": { + "$ref": "#/components/schemas/Usage" + }, + "error": { + "type": "object", + "properties": {} + } + } + }, + "ResponsesCompactionRequest": { + "type": "object", + "required": [ + "model" + ], + "properties": { + "model": { + "type": "string" + }, + "input": { + "description": "输入内容,可以是字符串或消息数组", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + ] + }, + "instructions": { + "type": "string" + }, + "previous_response_id": { + "type": "string" + } + } + }, + "ResponsesStreamResponse": { + "type": "object", + "properties": { + "type": { "type": "string" }, "response": { @@ -7138,4 +7239,4 @@ "BearerAuth": [] } ] -} \ No newline at end of file +} diff --git a/dto/openai_compaction.go b/dto/openai_compaction.go new file mode 100644 index 000000000..f19df09ce --- /dev/null +++ b/dto/openai_compaction.go @@ -0,0 +1,20 @@ +package dto + +import ( + "encoding/json" + + "github.com/QuantumNous/new-api/types" +) + +type OpenAIResponsesCompactionResponse struct { + ID string `json:"id"` + Object string `json:"object"` + CreatedAt int `json:"created_at"` + Output json.RawMessage `json:"output"` + Usage *Usage `json:"usage"` + Error any `json:"error,omitempty"` +} + +func (o *OpenAIResponsesCompactionResponse) GetOpenAIError() *types.OpenAIError { + return GetOpenAIError(o.Error) +} diff --git a/dto/openai_responses_compaction_request.go b/dto/openai_responses_compaction_request.go new file mode 100644 index 000000000..7ea584ca3 --- /dev/null +++ b/dto/openai_responses_compaction_request.go @@ -0,0 +1,40 @@ +package dto + +import ( + "encoding/json" + "strings" + + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" +) + +type OpenAIResponsesCompactionRequest struct { + Model string `json:"model"` + Input json.RawMessage `json:"input,omitempty"` + Instructions json.RawMessage `json:"instructions,omitempty"` + PreviousResponseID string `json:"previous_response_id,omitempty"` +} + +func (r *OpenAIResponsesCompactionRequest) GetTokenCountMeta() *types.TokenCountMeta { + var parts []string + if len(r.Instructions) > 0 { + parts = append(parts, string(r.Instructions)) + } + if len(r.Input) > 0 { + parts = append(parts, string(r.Input)) + } + return &types.TokenCountMeta{ + CombineText: strings.Join(parts, "\n"), + } +} + +func (r *OpenAIResponsesCompactionRequest) IsStream(c *gin.Context) bool { + return false +} + +func (r *OpenAIResponsesCompactionRequest) SetModelName(modelName string) { + if modelName != "" { + r.Model = modelName + } +} diff --git a/middleware/distributor.go b/middleware/distributor.go index 054763c9e..24dfa80aa 100644 --- a/middleware/distributor.go +++ b/middleware/distributor.go @@ -329,6 +329,10 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) { modelRequest.Group = req.Group common.SetContextKey(c, constant.ContextKeyTokenGroup, modelRequest.Group) } + + if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") && modelRequest.Model != "" { + modelRequest.Model = ratio_setting.WithCompactModelSuffix(modelRequest.Model) + } return &modelRequest, shouldSelectChannel, nil } diff --git a/relay/channel/codex/adaptor.go b/relay/channel/codex/adaptor.go index ab61dfac7..44ec98c8e 100644 --- a/relay/channel/codex/adaptor.go +++ b/relay/channel/codex/adaptor.go @@ -53,6 +53,8 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela } func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) { + isCompact := info != nil && info.RelayMode == relayconstant.RelayModeResponsesCompact + if info != nil && info.ChannelSetting.SystemPrompt != "" { systemPrompt := info.ChannelSetting.SystemPrompt @@ -88,7 +90,9 @@ func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommo } } } - + if isCompact { + return request, nil + } // codex: store must be false request.Store = json.RawMessage("false") // rm max_output_tokens @@ -102,10 +106,14 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request } func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) { - if info.RelayMode != relayconstant.RelayModeResponses { + if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact { return nil, types.NewError(errors.New("codex channel: endpoint not supported"), types.ErrorCodeInvalidRequest) } + if info.RelayMode == relayconstant.RelayModeResponsesCompact { + return openai.OaiResponsesCompactionHandler(c, resp) + } + if info.IsStream { return openai.OaiResponsesStreamHandler(c, info, resp) } @@ -121,10 +129,14 @@ func (a *Adaptor) GetChannelName() string { } func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { - if info.RelayMode != relayconstant.RelayModeResponses { - return "", errors.New("codex channel: only /v1/responses is supported") + if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact { + return "", errors.New("codex channel: only /v1/responses and /v1/responses/compact are supported") } - return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/backend-api/codex/responses", info.ChannelType), nil + path := "/backend-api/codex/responses" + if info.RelayMode == relayconstant.RelayModeResponsesCompact { + path = "/backend-api/codex/responses/compact" + } + return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, path, info.ChannelType), nil } func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error { diff --git a/relay/channel/codex/constants.go b/relay/channel/codex/constants.go index 461e033a4..8cdb2c38a 100644 --- a/relay/channel/codex/constants.go +++ b/relay/channel/codex/constants.go @@ -1,9 +1,25 @@ package codex -var ModelList = []string{ +import ( + "github.com/QuantumNous/new-api/setting/ratio_setting" + "github.com/samber/lo" +) + +var baseModelList = []string{ "gpt-5", "gpt-5-codex", "gpt-5-codex-mini", "gpt-5.1", "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "gpt-5.2", "gpt-5.2-codex", } +var ModelList = withCompactModelSuffix(baseModelList) + const ChannelName = "codex" + +func withCompactModelSuffix(models []string) []string { + out := make([]string, 0, len(models)*2) + out = append(out, models...) + out = append(out, lo.Map(models, func(model string, _ int) string { + return ratio_setting.WithCompactModelSuffix(model) + })...) + return lo.Uniq(out) +} diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index c031fd755..f76d67df4 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -620,6 +620,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom } else { usage, err = OaiResponsesHandler(c, info, resp) } + case relayconstant.RelayModeResponsesCompact: + usage, err = OaiResponsesCompactionHandler(c, resp) default: if info.IsStream { usage, err = OaiStreamHandler(c, info, resp) diff --git a/relay/channel/openai/relay_responses_compact.go b/relay/channel/openai/relay_responses_compact.go new file mode 100644 index 000000000..390de8ed6 --- /dev/null +++ b/relay/channel/openai/relay_responses_compact.go @@ -0,0 +1,44 @@ +package openai + +import ( + "io" + "net/http" + + "github.com/QuantumNous/new-api/common" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" +) + +func OaiResponsesCompactionHandler(c *gin.Context, resp *http.Response) (*dto.Usage, *types.NewAPIError) { + defer service.CloseResponseBodyGracefully(resp) + + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError) + } + + var compactResp dto.OpenAIResponsesCompactionResponse + if err := common.Unmarshal(responseBody, &compactResp); err != nil { + return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) + } + if oaiError := compactResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" { + return nil, types.WithOpenAIError(*oaiError, resp.StatusCode) + } + + service.IOCopyBytesGracefully(c, resp, responseBody) + + usage := dto.Usage{} + if compactResp.Usage != nil { + usage.PromptTokens = compactResp.Usage.InputTokens + usage.CompletionTokens = compactResp.Usage.OutputTokens + usage.TotalTokens = compactResp.Usage.TotalTokens + if compactResp.Usage.InputTokensDetails != nil { + usage.PromptTokensDetails.CachedTokens = compactResp.Usage.InputTokensDetails.CachedTokens + } + } + + return &usage, nil +} diff --git a/relay/common/relay_info.go b/relay/common/relay_info.go index 5c24ce57a..f4365c126 100644 --- a/relay/common/relay_info.go +++ b/relay/common/relay_info.go @@ -481,6 +481,11 @@ func GenRelayInfo(c *gin.Context, relayFormat types.RelayFormat, request dto.Req break } err = errors.New("request is not a OpenAIResponsesRequest") + case types.RelayFormatOpenAIResponsesCompaction: + if request, ok := request.(*dto.OpenAIResponsesCompactionRequest); ok { + return GenRelayInfoResponsesCompaction(c, request), nil + } + return nil, errors.New("request is not a OpenAIResponsesCompactionRequest") case types.RelayFormatTask: info = genBaseRelayInfo(c, nil) case types.RelayFormatMjProxy: @@ -531,6 +536,15 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) { info.RequestConversionChain = append(info.RequestConversionChain, format) } +func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo { + info := genBaseRelayInfo(c, request) + if info.RelayMode == relayconstant.RelayModeUnknown { + info.RelayMode = relayconstant.RelayModeResponsesCompact + } + info.RelayFormat = types.RelayFormatOpenAIResponsesCompaction + return info +} + //func (info *RelayInfo) SetPromptTokens(promptTokens int) { // info.promptTokens = promptTokens //} diff --git a/relay/constant/relay_mode.go b/relay/constant/relay_mode.go index 85a1b9c5f..256715679 100644 --- a/relay/constant/relay_mode.go +++ b/relay/constant/relay_mode.go @@ -50,6 +50,8 @@ const ( RelayModeRealtime RelayModeGemini + + RelayModeResponsesCompact ) func Path2RelayMode(path string) int { @@ -70,6 +72,8 @@ func Path2RelayMode(path string) int { relayMode = RelayModeImagesEdits } else if strings.HasPrefix(path, "/v1/edits") { relayMode = RelayModeEdits + } else if strings.HasPrefix(path, "/v1/responses/compact") { + relayMode = RelayModeResponsesCompact } else if strings.HasPrefix(path, "/v1/responses") { relayMode = RelayModeResponses } else if strings.HasPrefix(path, "/v1/audio/speech") { diff --git a/relay/helper/model_mapped.go b/relay/helper/model_mapped.go index 821158fae..5d6efa094 100644 --- a/relay/helper/model_mapped.go +++ b/relay/helper/model_mapped.go @@ -4,13 +4,27 @@ import ( "encoding/json" "errors" "fmt" + "strings" "github.com/QuantumNous/new-api/dto" "github.com/QuantumNous/new-api/relay/common" + relayconstant "github.com/QuantumNous/new-api/relay/constant" + "github.com/QuantumNous/new-api/setting/ratio_setting" "github.com/gin-gonic/gin" ) func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Request) error { + if info.ChannelMeta == nil { + info.ChannelMeta = &common.ChannelMeta{} + } + + isResponsesCompact := info.RelayMode == relayconstant.RelayModeResponsesCompact + originModelName := info.OriginModelName + mappingModelName := originModelName + if isResponsesCompact && strings.HasSuffix(originModelName, ratio_setting.CompactModelSuffix) { + mappingModelName = strings.TrimSuffix(originModelName, ratio_setting.CompactModelSuffix) + } + // map model name modelMapping := c.GetString("model_mapping") if modelMapping != "" && modelMapping != "{}" { @@ -21,7 +35,7 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque } // 支持链式模型重定向,最终使用链尾的模型 - currentModel := info.OriginModelName + currentModel := mappingModelName visitedModels := map[string]bool{ currentModel: true, } @@ -51,6 +65,15 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque info.UpstreamModelName = currentModel } } + + if isResponsesCompact { + finalUpstreamModelName := mappingModelName + if info.IsModelMapped && info.UpstreamModelName != "" { + finalUpstreamModelName = info.UpstreamModelName + } + info.UpstreamModelName = finalUpstreamModelName + info.OriginModelName = ratio_setting.WithCompactModelSuffix(finalUpstreamModelName) + } if request != nil { request.SetModelName(info.UpstreamModelName) } diff --git a/relay/helper/valid_request.go b/relay/helper/valid_request.go index 3bdfa6ff4..750f74993 100644 --- a/relay/helper/valid_request.go +++ b/relay/helper/valid_request.go @@ -34,6 +34,8 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt request, err = GetAndValidateClaudeRequest(c) case types.RelayFormatOpenAIResponses: request, err = GetAndValidateResponsesRequest(c) + case types.RelayFormatOpenAIResponsesCompaction: + request, err = GetAndValidateResponsesCompactionRequest(c) case types.RelayFormatOpenAIImage: request, err = GetAndValidOpenAIImageRequest(c, relayMode) @@ -125,6 +127,17 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest return request, nil } +func GetAndValidateResponsesCompactionRequest(c *gin.Context) (*dto.OpenAIResponsesCompactionRequest, error) { + request := &dto.OpenAIResponsesCompactionRequest{} + if err := common.UnmarshalBodyReusable(c, request); err != nil { + return nil, err + } + if request.Model == "" { + return nil, errors.New("model is required") + } + return request, nil +} + func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageRequest, error) { imageRequest := &dto.ImageRequest{} diff --git a/relay/responses_handler.go b/relay/responses_handler.go index 769437a1d..8954bd5cc 100644 --- a/relay/responses_handler.go +++ b/relay/responses_handler.go @@ -8,8 +8,10 @@ import ( "strings" "github.com/QuantumNous/new-api/common" + appconstant "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" + relayconstant "github.com/QuantumNous/new-api/relay/constant" "github.com/QuantumNous/new-api/relay/helper" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" @@ -20,10 +22,37 @@ import ( func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) { info.InitChannelMeta(c) + if info.RelayMode == relayconstant.RelayModeResponsesCompact { + switch info.ApiType { + case appconstant.APITypeOpenAI, appconstant.APITypeCodex: + default: + return types.NewErrorWithStatusCode( + fmt.Errorf("unsupported endpoint %q for api type %d", "/v1/responses/compact", info.ApiType), + types.ErrorCodeInvalidRequest, + http.StatusBadRequest, + types.ErrOptionWithSkipRetry(), + ) + } + } - responsesReq, ok := info.Request.(*dto.OpenAIResponsesRequest) - if !ok { - return types.NewErrorWithStatusCode(fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest, got %T", info.Request), types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry()) + var responsesReq *dto.OpenAIResponsesRequest + switch req := info.Request.(type) { + case *dto.OpenAIResponsesRequest: + responsesReq = req + case *dto.OpenAIResponsesCompactionRequest: + responsesReq = &dto.OpenAIResponsesRequest{ + Model: req.Model, + Input: req.Input, + Instructions: req.Instructions, + PreviousResponseID: req.PreviousResponseID, + } + default: + return types.NewErrorWithStatusCode( + fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest or dto.OpenAIResponsesCompactionRequest, got %T", info.Request), + types.ErrorCodeInvalidRequest, + http.StatusBadRequest, + types.ErrOptionWithSkipRetry(), + ) } request, err := common.DeepCopy(responsesReq) @@ -105,10 +134,28 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError * return newAPIError } + usageDto := usage.(*dto.Usage) + if info.RelayMode == relayconstant.RelayModeResponsesCompact { + originModelName := info.OriginModelName + originPriceData := info.PriceData + + _, err := helper.ModelPriceHelper(c, info, info.GetEstimatePromptTokens(), &types.TokenCountMeta{}) + if err != nil { + info.OriginModelName = originModelName + info.PriceData = originPriceData + return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry()) + } + postConsumeQuota(c, info, usageDto) + + info.OriginModelName = originModelName + info.PriceData = originPriceData + return nil + } + if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") { - service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "") + service.PostAudioConsumeQuota(c, info, usageDto, "") } else { - postConsumeQuota(c, info, usage.(*dto.Usage)) + postConsumeQuota(c, info, usageDto) } return nil } diff --git a/router/relay-router.go b/router/relay-router.go index 267459e2c..3dc4f5c16 100644 --- a/router/relay-router.go +++ b/router/relay-router.go @@ -93,6 +93,9 @@ func SetRelayRouter(router *gin.Engine) { httpRouter.POST("/responses", func(c *gin.Context) { controller.Relay(c, types.RelayFormatOpenAIResponses) }) + httpRouter.POST("/responses/compact", func(c *gin.Context) { + controller.Relay(c, types.RelayFormatOpenAIResponsesCompaction) + }) // image related routes httpRouter.POST("/edits", func(c *gin.Context) { diff --git a/setting/ratio_setting/compact_suffix.go b/setting/ratio_setting/compact_suffix.go new file mode 100644 index 000000000..2d2fe3c34 --- /dev/null +++ b/setting/ratio_setting/compact_suffix.go @@ -0,0 +1,13 @@ +package ratio_setting + +import "strings" + +const CompactModelSuffix = "-openai-compact" +const CompactWildcardModelKey = "*" + CompactModelSuffix + +func WithCompactModelSuffix(modelName string) string { + if strings.HasSuffix(modelName, CompactModelSuffix) { + return modelName + } + return modelName + CompactModelSuffix +} diff --git a/setting/ratio_setting/model_ratio.go b/setting/ratio_setting/model_ratio.go index 039d4a021..e55556fa1 100644 --- a/setting/ratio_setting/model_ratio.go +++ b/setting/ratio_setting/model_ratio.go @@ -411,6 +411,17 @@ func GetModelPrice(name string, printErr bool) (float64, bool) { name = FormatMatchingModelName(name) + if strings.HasSuffix(name, CompactModelSuffix) { + price, ok := modelPriceMap[CompactWildcardModelKey] + if !ok { + if printErr { + common.SysError("model price not found: " + name) + } + return -1, false + } + return price, true + } + price, ok := modelPriceMap[name] if !ok { if printErr { @@ -448,6 +459,12 @@ func GetModelRatio(name string) (float64, bool, string) { ratio, ok := modelRatioMap[name] if !ok { + if strings.HasSuffix(name, CompactModelSuffix) { + if wildcardRatio, ok := modelRatioMap[CompactWildcardModelKey]; ok { + return wildcardRatio, true, name + } + return 0, true, name + } return 37.5, operation_setting.SelfUseModeEnabled, name } return ratio, true, name diff --git a/types/relay_format.go b/types/relay_format.go index 6d94a70bc..9b4c86f24 100644 --- a/types/relay_format.go +++ b/types/relay_format.go @@ -3,15 +3,16 @@ package types type RelayFormat string const ( - RelayFormatOpenAI RelayFormat = "openai" - RelayFormatClaude = "claude" - RelayFormatGemini = "gemini" - RelayFormatOpenAIResponses = "openai_responses" - RelayFormatOpenAIAudio = "openai_audio" - RelayFormatOpenAIImage = "openai_image" - RelayFormatOpenAIRealtime = "openai_realtime" - RelayFormatRerank = "rerank" - RelayFormatEmbedding = "embedding" + RelayFormatOpenAI RelayFormat = "openai" + RelayFormatClaude = "claude" + RelayFormatGemini = "gemini" + RelayFormatOpenAIResponses = "openai_responses" + RelayFormatOpenAIResponsesCompaction = "openai_responses_compaction" + RelayFormatOpenAIAudio = "openai_audio" + RelayFormatOpenAIImage = "openai_image" + RelayFormatOpenAIRealtime = "openai_realtime" + RelayFormatRerank = "rerank" + RelayFormatEmbedding = "embedding" RelayFormatTask = "task" RelayFormatMjProxy = "mj_proxy" diff --git a/web/src/components/table/channels/modals/ModelTestModal.jsx b/web/src/components/table/channels/modals/ModelTestModal.jsx index 9556d56b7..47aa66cbe 100644 --- a/web/src/components/table/channels/modals/ModelTestModal.jsx +++ b/web/src/components/table/channels/modals/ModelTestModal.jsx @@ -66,6 +66,10 @@ const ModelTestModal = ({ { value: '', label: t('自动检测') }, { value: 'openai', label: 'OpenAI (/v1/chat/completions)' }, { value: 'openai-response', label: 'OpenAI Response (/v1/responses)' }, + { + value: 'openai-response-compact', + label: 'OpenAI Response Compaction (/v1/responses/compact)', + }, { value: 'anthropic', label: 'Anthropic (/v1/messages)' }, { value: 'gemini', diff --git a/web/src/components/table/models/modals/EditModelModal.jsx b/web/src/components/table/models/modals/EditModelModal.jsx index 727a8e4e8..3d905b1af 100644 --- a/web/src/components/table/models/modals/EditModelModal.jsx +++ b/web/src/components/table/models/modals/EditModelModal.jsx @@ -45,6 +45,7 @@ const { Text, Title } = Typography; const ENDPOINT_TEMPLATE = { openai: { path: '/v1/chat/completions', method: 'POST' }, 'openai-response': { path: '/v1/responses', method: 'POST' }, + 'openai-response-compact': { path: '/v1/responses/compact', method: 'POST' }, anthropic: { path: '/v1/messages', method: 'POST' }, gemini: { path: '/v1beta/models/{model}:generateContent', method: 'POST' }, 'jina-rerank': { path: '/v1/rerank', method: 'POST' }, diff --git a/web/src/components/table/models/modals/EditPrefillGroupModal.jsx b/web/src/components/table/models/modals/EditPrefillGroupModal.jsx index 817ae40dd..bb7af45cd 100644 --- a/web/src/components/table/models/modals/EditPrefillGroupModal.jsx +++ b/web/src/components/table/models/modals/EditPrefillGroupModal.jsx @@ -43,6 +43,7 @@ const { Text, Title } = Typography; const ENDPOINT_TEMPLATE = { openai: { path: '/v1/chat/completions', method: 'POST' }, 'openai-response': { path: '/v1/responses', method: 'POST' }, + 'openai-response-compact': { path: '/v1/responses/compact', method: 'POST' }, anthropic: { path: '/v1/messages', method: 'POST' }, gemini: { path: '/v1beta/models/{model}:generateContent', method: 'POST' }, 'jina-rerank': { path: '/v1/rerank', method: 'POST' }, diff --git a/web/src/constants/common.constant.js b/web/src/constants/common.constant.js index a142a0eb5..8737f7299 100644 --- a/web/src/constants/common.constant.js +++ b/web/src/constants/common.constant.js @@ -26,6 +26,7 @@ export const TABLE_COMPACT_MODES_KEY = 'table_compact_modes'; export const API_ENDPOINTS = [ '/v1/chat/completions', '/v1/responses', + '/v1/responses/compact', '/v1/messages', '/v1beta/models', '/v1/embeddings',