diff --git a/controller/relay-text.go b/controller/relay-text.go index a6a276f3..1bb463fa 100644 --- a/controller/relay-text.go +++ b/controller/relay-text.go @@ -305,51 +305,54 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode { } var textResponse TextResponse + tokenName := c.GetString("token_name") + channelId := c.GetInt("channel_id") defer func() { - c.Writer.Flush() - if consumeQuota { - quota := 0 - completionRatio := 1.0 - if strings.HasPrefix(textRequest.Model, "gpt-3.5") { - completionRatio = 1.333333 - } - if strings.HasPrefix(textRequest.Model, "gpt-4") { - completionRatio = 2 - } + // c.Writer.Flush() + go func() { + if consumeQuota { + quota := 0 + completionRatio := 1.0 + if strings.HasPrefix(textRequest.Model, "gpt-3.5") { + completionRatio = 1.333333 + } + if strings.HasPrefix(textRequest.Model, "gpt-4") { + completionRatio = 2 + } - promptTokens = textResponse.Usage.PromptTokens - completionTokens = textResponse.Usage.CompletionTokens + promptTokens = textResponse.Usage.PromptTokens + completionTokens = textResponse.Usage.CompletionTokens - quota = promptTokens + int(float64(completionTokens)*completionRatio) - quota = int(float64(quota) * ratio) - if ratio != 0 && quota <= 0 { - quota = 1 + quota = promptTokens + int(float64(completionTokens)*completionRatio) + quota = int(float64(quota) * ratio) + if ratio != 0 && quota <= 0 { + quota = 1 + } + totalTokens := promptTokens + completionTokens + if totalTokens == 0 { + // in this case, must be some error happened + // we cannot just return, because we may have to return the pre-consumed quota + quota = 0 + } + quotaDelta := quota - preConsumedQuota + err := model.PostConsumeTokenQuota(tokenId, quotaDelta) + if err != nil { + common.SysError("error consuming token remain quota: " + err.Error()) + } + err = model.CacheUpdateUserQuota(userId) + if err != nil { + common.SysError("error update user quota cache: " + err.Error()) + } + if quota != 0 { + logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio) + model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent) + model.UpdateUserUsedQuotaAndRequestCount(userId, quota) + + model.UpdateChannelUsedQuota(channelId, quota) + } } - totalTokens := promptTokens + completionTokens - if totalTokens == 0 { - // in this case, must be some error happened - // we cannot just return, because we may have to return the pre-consumed quota - quota = 0 - } - quotaDelta := quota - preConsumedQuota - err := model.PostConsumeTokenQuota(tokenId, quotaDelta) - if err != nil { - common.SysError("error consuming token remain quota: " + err.Error()) - } - err = model.CacheUpdateUserQuota(userId) - if err != nil { - common.SysError("error update user quota cache: " + err.Error()) - } - if quota != 0 { - tokenName := c.GetString("token_name") - logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio) - model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent) - model.UpdateUserUsedQuotaAndRequestCount(userId, quota) - channelId := c.GetInt("channel_id") - model.UpdateChannelUsedQuota(channelId, quota) - } - } + }() }() switch apiType { case APITypeOpenAI: