mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 02:25:00 +00:00
feat: openai response /v1/response/compact (#2644)
* feat: openai response /v1/response/compact * feat: /v1/response/compact bill * feat: /v1/response/compact * feat: /v1/responses/compact -> codex channel * feat: /v1/responses/compact -> codex channel * feat: /v1/responses/compact -> codex channel * feat: codex channel default models * feat: compact model price * feat: /v1/responses/comapct test
This commit is contained in:
@@ -53,6 +53,8 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
||||
isCompact := info != nil && info.RelayMode == relayconstant.RelayModeResponsesCompact
|
||||
|
||||
if info != nil && info.ChannelSetting.SystemPrompt != "" {
|
||||
systemPrompt := info.ChannelSetting.SystemPrompt
|
||||
|
||||
@@ -88,7 +90,9 @@ func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isCompact {
|
||||
return request, nil
|
||||
}
|
||||
// codex: store must be false
|
||||
request.Store = json.RawMessage("false")
|
||||
// rm max_output_tokens
|
||||
@@ -102,10 +106,14 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
|
||||
}
|
||||
|
||||
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||||
if info.RelayMode != relayconstant.RelayModeResponses {
|
||||
if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
|
||||
return nil, types.NewError(errors.New("codex channel: endpoint not supported"), types.ErrorCodeInvalidRequest)
|
||||
}
|
||||
|
||||
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
||||
return openai.OaiResponsesCompactionHandler(c, resp)
|
||||
}
|
||||
|
||||
if info.IsStream {
|
||||
return openai.OaiResponsesStreamHandler(c, info, resp)
|
||||
}
|
||||
@@ -121,10 +129,14 @@ func (a *Adaptor) GetChannelName() string {
|
||||
}
|
||||
|
||||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
if info.RelayMode != relayconstant.RelayModeResponses {
|
||||
return "", errors.New("codex channel: only /v1/responses is supported")
|
||||
if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
|
||||
return "", errors.New("codex channel: only /v1/responses and /v1/responses/compact are supported")
|
||||
}
|
||||
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/backend-api/codex/responses", info.ChannelType), nil
|
||||
path := "/backend-api/codex/responses"
|
||||
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
||||
path = "/backend-api/codex/responses/compact"
|
||||
}
|
||||
return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, path, info.ChannelType), nil
|
||||
}
|
||||
|
||||
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
|
||||
|
||||
@@ -1,9 +1,25 @@
|
||||
package codex
|
||||
|
||||
var ModelList = []string{
|
||||
import (
|
||||
"github.com/QuantumNous/new-api/setting/ratio_setting"
|
||||
"github.com/samber/lo"
|
||||
)
|
||||
|
||||
var baseModelList = []string{
|
||||
"gpt-5", "gpt-5-codex", "gpt-5-codex-mini",
|
||||
"gpt-5.1", "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini",
|
||||
"gpt-5.2", "gpt-5.2-codex",
|
||||
}
|
||||
|
||||
var ModelList = withCompactModelSuffix(baseModelList)
|
||||
|
||||
const ChannelName = "codex"
|
||||
|
||||
func withCompactModelSuffix(models []string) []string {
|
||||
out := make([]string, 0, len(models)*2)
|
||||
out = append(out, models...)
|
||||
out = append(out, lo.Map(models, func(model string, _ int) string {
|
||||
return ratio_setting.WithCompactModelSuffix(model)
|
||||
})...)
|
||||
return lo.Uniq(out)
|
||||
}
|
||||
|
||||
@@ -620,6 +620,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
|
||||
} else {
|
||||
usage, err = OaiResponsesHandler(c, info, resp)
|
||||
}
|
||||
case relayconstant.RelayModeResponsesCompact:
|
||||
usage, err = OaiResponsesCompactionHandler(c, resp)
|
||||
default:
|
||||
if info.IsStream {
|
||||
usage, err = OaiStreamHandler(c, info, resp)
|
||||
|
||||
44
relay/channel/openai/relay_responses_compact.go
Normal file
44
relay/channel/openai/relay_responses_compact.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/service"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func OaiResponsesCompactionHandler(c *gin.Context, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
|
||||
defer service.CloseResponseBodyGracefully(resp)
|
||||
|
||||
responseBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
var compactResp dto.OpenAIResponsesCompactionResponse
|
||||
if err := common.Unmarshal(responseBody, &compactResp); err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
if oaiError := compactResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
|
||||
return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
|
||||
}
|
||||
|
||||
service.IOCopyBytesGracefully(c, resp, responseBody)
|
||||
|
||||
usage := dto.Usage{}
|
||||
if compactResp.Usage != nil {
|
||||
usage.PromptTokens = compactResp.Usage.InputTokens
|
||||
usage.CompletionTokens = compactResp.Usage.OutputTokens
|
||||
usage.TotalTokens = compactResp.Usage.TotalTokens
|
||||
if compactResp.Usage.InputTokensDetails != nil {
|
||||
usage.PromptTokensDetails.CachedTokens = compactResp.Usage.InputTokensDetails.CachedTokens
|
||||
}
|
||||
}
|
||||
|
||||
return &usage, nil
|
||||
}
|
||||
@@ -481,6 +481,11 @@ func GenRelayInfo(c *gin.Context, relayFormat types.RelayFormat, request dto.Req
|
||||
break
|
||||
}
|
||||
err = errors.New("request is not a OpenAIResponsesRequest")
|
||||
case types.RelayFormatOpenAIResponsesCompaction:
|
||||
if request, ok := request.(*dto.OpenAIResponsesCompactionRequest); ok {
|
||||
return GenRelayInfoResponsesCompaction(c, request), nil
|
||||
}
|
||||
return nil, errors.New("request is not a OpenAIResponsesCompactionRequest")
|
||||
case types.RelayFormatTask:
|
||||
info = genBaseRelayInfo(c, nil)
|
||||
case types.RelayFormatMjProxy:
|
||||
@@ -531,6 +536,15 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) {
|
||||
info.RequestConversionChain = append(info.RequestConversionChain, format)
|
||||
}
|
||||
|
||||
func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
|
||||
info := genBaseRelayInfo(c, request)
|
||||
if info.RelayMode == relayconstant.RelayModeUnknown {
|
||||
info.RelayMode = relayconstant.RelayModeResponsesCompact
|
||||
}
|
||||
info.RelayFormat = types.RelayFormatOpenAIResponsesCompaction
|
||||
return info
|
||||
}
|
||||
|
||||
//func (info *RelayInfo) SetPromptTokens(promptTokens int) {
|
||||
// info.promptTokens = promptTokens
|
||||
//}
|
||||
|
||||
@@ -50,6 +50,8 @@ const (
|
||||
RelayModeRealtime
|
||||
|
||||
RelayModeGemini
|
||||
|
||||
RelayModeResponsesCompact
|
||||
)
|
||||
|
||||
func Path2RelayMode(path string) int {
|
||||
@@ -70,6 +72,8 @@ func Path2RelayMode(path string) int {
|
||||
relayMode = RelayModeImagesEdits
|
||||
} else if strings.HasPrefix(path, "/v1/edits") {
|
||||
relayMode = RelayModeEdits
|
||||
} else if strings.HasPrefix(path, "/v1/responses/compact") {
|
||||
relayMode = RelayModeResponsesCompact
|
||||
} else if strings.HasPrefix(path, "/v1/responses") {
|
||||
relayMode = RelayModeResponses
|
||||
} else if strings.HasPrefix(path, "/v1/audio/speech") {
|
||||
|
||||
@@ -4,13 +4,27 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/relay/common"
|
||||
relayconstant "github.com/QuantumNous/new-api/relay/constant"
|
||||
"github.com/QuantumNous/new-api/setting/ratio_setting"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Request) error {
|
||||
if info.ChannelMeta == nil {
|
||||
info.ChannelMeta = &common.ChannelMeta{}
|
||||
}
|
||||
|
||||
isResponsesCompact := info.RelayMode == relayconstant.RelayModeResponsesCompact
|
||||
originModelName := info.OriginModelName
|
||||
mappingModelName := originModelName
|
||||
if isResponsesCompact && strings.HasSuffix(originModelName, ratio_setting.CompactModelSuffix) {
|
||||
mappingModelName = strings.TrimSuffix(originModelName, ratio_setting.CompactModelSuffix)
|
||||
}
|
||||
|
||||
// map model name
|
||||
modelMapping := c.GetString("model_mapping")
|
||||
if modelMapping != "" && modelMapping != "{}" {
|
||||
@@ -21,7 +35,7 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
|
||||
}
|
||||
|
||||
// 支持链式模型重定向,最终使用链尾的模型
|
||||
currentModel := info.OriginModelName
|
||||
currentModel := mappingModelName
|
||||
visitedModels := map[string]bool{
|
||||
currentModel: true,
|
||||
}
|
||||
@@ -51,6 +65,15 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
|
||||
info.UpstreamModelName = currentModel
|
||||
}
|
||||
}
|
||||
|
||||
if isResponsesCompact {
|
||||
finalUpstreamModelName := mappingModelName
|
||||
if info.IsModelMapped && info.UpstreamModelName != "" {
|
||||
finalUpstreamModelName = info.UpstreamModelName
|
||||
}
|
||||
info.UpstreamModelName = finalUpstreamModelName
|
||||
info.OriginModelName = ratio_setting.WithCompactModelSuffix(finalUpstreamModelName)
|
||||
}
|
||||
if request != nil {
|
||||
request.SetModelName(info.UpstreamModelName)
|
||||
}
|
||||
|
||||
@@ -34,6 +34,8 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt
|
||||
request, err = GetAndValidateClaudeRequest(c)
|
||||
case types.RelayFormatOpenAIResponses:
|
||||
request, err = GetAndValidateResponsesRequest(c)
|
||||
case types.RelayFormatOpenAIResponsesCompaction:
|
||||
request, err = GetAndValidateResponsesCompactionRequest(c)
|
||||
|
||||
case types.RelayFormatOpenAIImage:
|
||||
request, err = GetAndValidOpenAIImageRequest(c, relayMode)
|
||||
@@ -125,6 +127,17 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest
|
||||
return request, nil
|
||||
}
|
||||
|
||||
func GetAndValidateResponsesCompactionRequest(c *gin.Context) (*dto.OpenAIResponsesCompactionRequest, error) {
|
||||
request := &dto.OpenAIResponsesCompactionRequest{}
|
||||
if err := common.UnmarshalBodyReusable(c, request); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if request.Model == "" {
|
||||
return nil, errors.New("model is required")
|
||||
}
|
||||
return request, nil
|
||||
}
|
||||
|
||||
func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageRequest, error) {
|
||||
imageRequest := &dto.ImageRequest{}
|
||||
|
||||
|
||||
@@ -8,8 +8,10 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
appconstant "github.com/QuantumNous/new-api/constant"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
relayconstant "github.com/QuantumNous/new-api/relay/constant"
|
||||
"github.com/QuantumNous/new-api/relay/helper"
|
||||
"github.com/QuantumNous/new-api/service"
|
||||
"github.com/QuantumNous/new-api/setting/model_setting"
|
||||
@@ -20,10 +22,37 @@ import (
|
||||
|
||||
func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) {
|
||||
info.InitChannelMeta(c)
|
||||
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
||||
switch info.ApiType {
|
||||
case appconstant.APITypeOpenAI, appconstant.APITypeCodex:
|
||||
default:
|
||||
return types.NewErrorWithStatusCode(
|
||||
fmt.Errorf("unsupported endpoint %q for api type %d", "/v1/responses/compact", info.ApiType),
|
||||
types.ErrorCodeInvalidRequest,
|
||||
http.StatusBadRequest,
|
||||
types.ErrOptionWithSkipRetry(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
responsesReq, ok := info.Request.(*dto.OpenAIResponsesRequest)
|
||||
if !ok {
|
||||
return types.NewErrorWithStatusCode(fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest, got %T", info.Request), types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
|
||||
var responsesReq *dto.OpenAIResponsesRequest
|
||||
switch req := info.Request.(type) {
|
||||
case *dto.OpenAIResponsesRequest:
|
||||
responsesReq = req
|
||||
case *dto.OpenAIResponsesCompactionRequest:
|
||||
responsesReq = &dto.OpenAIResponsesRequest{
|
||||
Model: req.Model,
|
||||
Input: req.Input,
|
||||
Instructions: req.Instructions,
|
||||
PreviousResponseID: req.PreviousResponseID,
|
||||
}
|
||||
default:
|
||||
return types.NewErrorWithStatusCode(
|
||||
fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest or dto.OpenAIResponsesCompactionRequest, got %T", info.Request),
|
||||
types.ErrorCodeInvalidRequest,
|
||||
http.StatusBadRequest,
|
||||
types.ErrOptionWithSkipRetry(),
|
||||
)
|
||||
}
|
||||
|
||||
request, err := common.DeepCopy(responsesReq)
|
||||
@@ -105,10 +134,28 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
|
||||
return newAPIError
|
||||
}
|
||||
|
||||
usageDto := usage.(*dto.Usage)
|
||||
if info.RelayMode == relayconstant.RelayModeResponsesCompact {
|
||||
originModelName := info.OriginModelName
|
||||
originPriceData := info.PriceData
|
||||
|
||||
_, err := helper.ModelPriceHelper(c, info, info.GetEstimatePromptTokens(), &types.TokenCountMeta{})
|
||||
if err != nil {
|
||||
info.OriginModelName = originModelName
|
||||
info.PriceData = originPriceData
|
||||
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
|
||||
}
|
||||
postConsumeQuota(c, info, usageDto)
|
||||
|
||||
info.OriginModelName = originModelName
|
||||
info.PriceData = originPriceData
|
||||
return nil
|
||||
}
|
||||
|
||||
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
|
||||
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
|
||||
service.PostAudioConsumeQuota(c, info, usageDto, "")
|
||||
} else {
|
||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
||||
postConsumeQuota(c, info, usageDto)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user