diff --git a/controller/relay-text.go b/controller/relay-text.go
index a6a276f3..1bb463fa 100644
--- a/controller/relay-text.go
+++ b/controller/relay-text.go
@@ -305,51 +305,54 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 	}
 
 	var textResponse TextResponse
+	tokenName := c.GetString("token_name")
+	channelId := c.GetInt("channel_id")
 
 	defer func() {
-		c.Writer.Flush()
-		if consumeQuota {
-			quota := 0
-			completionRatio := 1.0
-			if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
-				completionRatio = 1.333333
-			}
-			if strings.HasPrefix(textRequest.Model, "gpt-4") {
-				completionRatio = 2
-			}
+		// c.Writer.Flush()
+		go func() {
+			if consumeQuota {
+				quota := 0
+				completionRatio := 1.0
+				if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
+					completionRatio = 1.333333
+				}
+				if strings.HasPrefix(textRequest.Model, "gpt-4") {
+					completionRatio = 2
+				}
 
-			promptTokens = textResponse.Usage.PromptTokens
-			completionTokens = textResponse.Usage.CompletionTokens
+				promptTokens = textResponse.Usage.PromptTokens
+				completionTokens = textResponse.Usage.CompletionTokens
 
-			quota = promptTokens + int(float64(completionTokens)*completionRatio)
-			quota = int(float64(quota) * ratio)
-			if ratio != 0 && quota <= 0 {
-				quota = 1
+				quota = promptTokens + int(float64(completionTokens)*completionRatio)
+				quota = int(float64(quota) * ratio)
+				if ratio != 0 && quota <= 0 {
+					quota = 1
+				}
+				totalTokens := promptTokens + completionTokens
+				if totalTokens == 0 {
+					// in this case, must be some error happened
+					// we cannot just return, because we may have to return the pre-consumed quota
+					quota = 0
+				}
+				quotaDelta := quota - preConsumedQuota
+				err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
+				if err != nil {
+					common.SysError("error consuming token remain quota: " + err.Error())
+				}
+				err = model.CacheUpdateUserQuota(userId)
+				if err != nil {
+					common.SysError("error update user quota cache: " + err.Error())
+				}
+				if quota != 0 {
+					logContent := fmt.Sprintf("模型倍率 %.2f，分组倍率 %.2f", modelRatio, groupRatio)
+					model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
+					model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
+
+					model.UpdateChannelUsedQuota(channelId, quota)
+				}
 			}
-			totalTokens := promptTokens + completionTokens
-			if totalTokens == 0 {
-				// in this case, must be some error happened
-				// we cannot just return, because we may have to return the pre-consumed quota
-				quota = 0
-			}
-			quotaDelta := quota - preConsumedQuota
-			err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
-			if err != nil {
-				common.SysError("error consuming token remain quota: " + err.Error())
-			}
-			err = model.CacheUpdateUserQuota(userId)
-			if err != nil {
-				common.SysError("error update user quota cache: " + err.Error())
-			}
-			if quota != 0 {
-				tokenName := c.GetString("token_name")
-				logContent := fmt.Sprintf("模型倍率 %.2f，分组倍率 %.2f", modelRatio, groupRatio)
-				model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
-				model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
-				channelId := c.GetInt("channel_id")
-				model.UpdateChannelUsedQuota(channelId, quota)
-			}
-		}
+		}()
 	}()
 	switch apiType {
 	case APITypeOpenAI: