perf: use a goroutine to handle quota post consumption (#364)

2023-08-12 19:36:31 +08:00 · 2023-08-12 19:36:31 +08:00 · 0e9ff8825e
commit 0e9ff8825e
parent e0b4f96b5b
1 changed files with 43 additions and 40 deletions
--- a/controller/relay-text.go
+++ b/controller/relay-text.go
@ -305,51 +305,54 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 	}
 	var textResponse TextResponse
 	tokenName := c.GetString("token_name")
 	channelId := c.GetInt("channel_id")
 	defer func() {
-		c.Writer.Flush()
+		// c.Writer.Flush()
-		if consumeQuota {
+		go func() {
-			quota := 0
+			if consumeQuota {
-			completionRatio := 1.0
+				quota := 0
-			if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
+				completionRatio := 1.0
-				completionRatio = 1.333333
+				if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
-			}
+					completionRatio = 1.333333
-			if strings.HasPrefix(textRequest.Model, "gpt-4") {
+				}
-				completionRatio = 2
+				if strings.HasPrefix(textRequest.Model, "gpt-4") {
-			}
+					completionRatio = 2
 				}
-			promptTokens = textResponse.Usage.PromptTokens
+				promptTokens = textResponse.Usage.PromptTokens
-			completionTokens = textResponse.Usage.CompletionTokens
+				completionTokens = textResponse.Usage.CompletionTokens
-			quota = promptTokens + int(float64(completionTokens)*completionRatio)
+				quota = promptTokens + int(float64(completionTokens)*completionRatio)
-			quota = int(float64(quota) * ratio)
+				quota = int(float64(quota) * ratio)
-			if ratio != 0 && quota <= 0 {
+				if ratio != 0 && quota <= 0 {
-				quota = 1
+					quota = 1
 				}
 				totalTokens := promptTokens + completionTokens
 				if totalTokens == 0 {
 					// in this case, must be some error happened
 					// we cannot just return, because we may have to return the pre-consumed quota
 					quota = 0
 				}
 				quotaDelta := quota - preConsumedQuota
 				err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
 				if err != nil {
 					common.SysError("error consuming token remain quota: " + err.Error())
 				}
 				err = model.CacheUpdateUserQuota(userId)
 				if err != nil {
 					common.SysError("error update user quota cache: " + err.Error())
 				}
 				if quota != 0 {
 					logContent := fmt.Sprintf("模型倍率 %.2f，分组倍率 %.2f", modelRatio, groupRatio)
 					model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
 					model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
 					model.UpdateChannelUsedQuota(channelId, quota)
 				}
 			}
-			totalTokens := promptTokens + completionTokens
+		}()
 			if totalTokens == 0 {
 				// in this case, must be some error happened
 				// we cannot just return, because we may have to return the pre-consumed quota
 				quota = 0
 			}
 			quotaDelta := quota - preConsumedQuota
 			err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
 			if err != nil {
 				common.SysError("error consuming token remain quota: " + err.Error())
 			}
 			err = model.CacheUpdateUserQuota(userId)
 			if err != nil {
 				common.SysError("error update user quota cache: " + err.Error())
 			}
 			if quota != 0 {
 				tokenName := c.GetString("token_name")
 				logContent := fmt.Sprintf("模型倍率 %.2f，分组倍率 %.2f", modelRatio, groupRatio)
 				model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
 				model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
 				channelId := c.GetInt("channel_id")
 				model.UpdateChannelUsedQuota(channelId, quota)
 			}
 		}
 	}()
 	switch apiType {
 	case APITypeOpenAI: