mirror of
https://github.com/QuantumNous/new-api.git
synced 2026-03-30 04:40:59 +00:00
feat: refactor token estimation logic
- Introduced new OpenAI text models in `common/model.go`. - Added `IsOpenAITextModel` function to check for OpenAI text models. - Refactored token estimation methods across various channels to use estimated prompt tokens instead of direct prompt token counts. - Updated related functions and structures to accommodate the new token estimation approach, enhancing overall token management.
This commit is contained in:
@@ -73,6 +73,11 @@ type ChannelMeta struct {
|
||||
SupportStreamOptions bool // 是否支持流式选项
|
||||
}
|
||||
|
||||
type TokenCountMeta struct {
|
||||
//promptTokens int
|
||||
estimatePromptTokens int
|
||||
}
|
||||
|
||||
type RelayInfo struct {
|
||||
TokenId int
|
||||
TokenKey string
|
||||
@@ -91,7 +96,6 @@ type RelayInfo struct {
|
||||
RelayMode int
|
||||
OriginModelName string
|
||||
RequestURLPath string
|
||||
PromptTokens int
|
||||
ShouldIncludeUsage bool
|
||||
DisablePing bool // 是否禁止向下游发送自定义 Ping
|
||||
ClientWs *websocket.Conn
|
||||
@@ -115,6 +119,7 @@ type RelayInfo struct {
|
||||
Request dto.Request
|
||||
|
||||
ThinkingContentInfo
|
||||
TokenCountMeta
|
||||
*ClaudeConvertInfo
|
||||
*RerankerInfo
|
||||
*ResponsesUsageInfo
|
||||
@@ -189,7 +194,7 @@ func (info *RelayInfo) ToString() string {
|
||||
fmt.Fprintf(b, "IsPlayground: %t, ", info.IsPlayground)
|
||||
fmt.Fprintf(b, "RequestURLPath: %q, ", info.RequestURLPath)
|
||||
fmt.Fprintf(b, "OriginModelName: %q, ", info.OriginModelName)
|
||||
fmt.Fprintf(b, "PromptTokens: %d, ", info.PromptTokens)
|
||||
fmt.Fprintf(b, "EstimatePromptTokens: %d, ", info.estimatePromptTokens)
|
||||
fmt.Fprintf(b, "ShouldIncludeUsage: %t, ", info.ShouldIncludeUsage)
|
||||
fmt.Fprintf(b, "DisablePing: %t, ", info.DisablePing)
|
||||
fmt.Fprintf(b, "SendResponseCount: %d, ", info.SendResponseCount)
|
||||
@@ -391,7 +396,6 @@ func genBaseRelayInfo(c *gin.Context, request dto.Request) *RelayInfo {
|
||||
UserEmail: common.GetContextKeyString(c, constant.ContextKeyUserEmail),
|
||||
|
||||
OriginModelName: common.GetContextKeyString(c, constant.ContextKeyOriginalModel),
|
||||
PromptTokens: common.GetContextKeyInt(c, constant.ContextKeyPromptTokens),
|
||||
|
||||
TokenId: common.GetContextKeyInt(c, constant.ContextKeyTokenId),
|
||||
TokenKey: common.GetContextKeyString(c, constant.ContextKeyTokenKey),
|
||||
@@ -408,6 +412,10 @@ func genBaseRelayInfo(c *gin.Context, request dto.Request) *RelayInfo {
|
||||
IsFirstThinkingContent: true,
|
||||
SendLastThinkingContent: false,
|
||||
},
|
||||
TokenCountMeta: TokenCountMeta{
|
||||
//promptTokens: common.GetContextKeyInt(c, constant.ContextKeyPromptTokens),
|
||||
estimatePromptTokens: common.GetContextKeyInt(c, constant.ContextKeyEstimatedTokens),
|
||||
},
|
||||
}
|
||||
|
||||
if info.RelayMode == relayconstant.RelayModeUnknown {
|
||||
@@ -463,8 +471,16 @@ func GenRelayInfo(c *gin.Context, relayFormat types.RelayFormat, request dto.Req
|
||||
}
|
||||
}
|
||||
|
||||
func (info *RelayInfo) SetPromptTokens(promptTokens int) {
|
||||
info.PromptTokens = promptTokens
|
||||
//func (info *RelayInfo) SetPromptTokens(promptTokens int) {
|
||||
// info.promptTokens = promptTokens
|
||||
//}
|
||||
|
||||
func (info *RelayInfo) SetEstimatePromptTokens(promptTokens int) {
|
||||
info.estimatePromptTokens = promptTokens
|
||||
}
|
||||
|
||||
func (info *RelayInfo) GetEstimatePromptTokens() int {
|
||||
return info.estimatePromptTokens
|
||||
}
|
||||
|
||||
func (info *RelayInfo) SetFirstResponseTime() {
|
||||
|
||||
Reference in New Issue
Block a user