perf: use a goroutine to handle quota post consumption (#364)

This commit is contained in:
JustSong 2023-08-12 19:36:31 +08:00
parent e0b4f96b5b
commit 0e9ff8825e

View File

@ -305,51 +305,54 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
} }
var textResponse TextResponse var textResponse TextResponse
tokenName := c.GetString("token_name")
channelId := c.GetInt("channel_id")
defer func() { defer func() {
c.Writer.Flush() // c.Writer.Flush()
if consumeQuota { go func() {
quota := 0 if consumeQuota {
completionRatio := 1.0 quota := 0
if strings.HasPrefix(textRequest.Model, "gpt-3.5") { completionRatio := 1.0
completionRatio = 1.333333 if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
} completionRatio = 1.333333
if strings.HasPrefix(textRequest.Model, "gpt-4") { }
completionRatio = 2 if strings.HasPrefix(textRequest.Model, "gpt-4") {
} completionRatio = 2
}
promptTokens = textResponse.Usage.PromptTokens promptTokens = textResponse.Usage.PromptTokens
completionTokens = textResponse.Usage.CompletionTokens completionTokens = textResponse.Usage.CompletionTokens
quota = promptTokens + int(float64(completionTokens)*completionRatio) quota = promptTokens + int(float64(completionTokens)*completionRatio)
quota = int(float64(quota) * ratio) quota = int(float64(quota) * ratio)
if ratio != 0 && quota <= 0 { if ratio != 0 && quota <= 0 {
quota = 1 quota = 1
}
totalTokens := promptTokens + completionTokens
if totalTokens == 0 {
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
}
quotaDelta := quota - preConsumedQuota
err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
if err != nil {
common.SysError("error consuming token remain quota: " + err.Error())
}
err = model.CacheUpdateUserQuota(userId)
if err != nil {
common.SysError("error update user quota cache: " + err.Error())
}
if quota != 0 {
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
model.UpdateChannelUsedQuota(channelId, quota)
}
} }
totalTokens := promptTokens + completionTokens }()
if totalTokens == 0 {
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
}
quotaDelta := quota - preConsumedQuota
err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
if err != nil {
common.SysError("error consuming token remain quota: " + err.Error())
}
err = model.CacheUpdateUserQuota(userId)
if err != nil {
common.SysError("error update user quota cache: " + err.Error())
}
if quota != 0 {
tokenName := c.GetString("token_name")
logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
channelId := c.GetInt("channel_id")
model.UpdateChannelUsedQuota(channelId, quota)
}
}
}() }()
switch apiType { switch apiType {
case APITypeOpenAI: case APITypeOpenAI: