From 38668e73311f0bb85a1080b7dad0565eac56f27e Mon Sep 17 00:00:00 2001 From: JustSong Date: Wed, 14 Jun 2023 09:41:06 +0800 Subject: [PATCH] chore: update gpt3.5 completion ratio --- common/model-ratio.go | 12 +++++++----- controller/relay.go | 11 +++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/common/model-ratio.go b/common/model-ratio.go index 4bdb6fec..5aa8f2d8 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -2,9 +2,11 @@ package common import "encoding/json" +// ModelRatio // https://platform.openai.com/docs/models/model-endpoint-compatibility // https://openai.com/pricing // TODO: when a new api is enabled, check the pricing here +// 1 === $0.002 / 1K tokens var ModelRatio = map[string]float64{ "gpt-4": 15, "gpt-4-0314": 15, @@ -12,11 +14,11 @@ var ModelRatio = map[string]float64{ "gpt-4-32k": 30, "gpt-4-32k-0314": 30, "gpt-4-32k-0613": 30, - "gpt-3.5-turbo": 1, // $0.002 / 1K tokens - "gpt-3.5-turbo-0301": 1, - "gpt-3.5-turbo-0613": 1, - "gpt-3.5-turbo-16k": 2, // $0.004 / 1K tokens - "gpt-3.5-turbo-16k-0613": 2, + "gpt-3.5-turbo": 0.75, // $0.0015 / 1K tokens + "gpt-3.5-turbo-0301": 0.75, + "gpt-3.5-turbo-0613": 0.75, + "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens + "gpt-3.5-turbo-16k-0613": 1.5, "text-ada-001": 0.2, "text-babbage-001": 0.25, "text-curie-001": 1, diff --git a/controller/relay.go b/controller/relay.go index 35897909..cf357104 100644 --- a/controller/relay.go +++ b/controller/relay.go @@ -239,16 +239,15 @@ func relayHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode { defer func() { if consumeQuota { quota := 0 - usingGPT4 := strings.HasPrefix(textRequest.Model, "gpt-4") - completionRatio := 1 - if usingGPT4 { + completionRatio := 1.34 // default for gpt-3 + if strings.HasPrefix(textRequest.Model, "gpt-4") { completionRatio = 2 } if isStream { responseTokens := countTokenText(streamResponseText, textRequest.Model) - quota = promptTokens + responseTokens*completionRatio + quota = promptTokens + int(float64(responseTokens)*completionRatio) } else { - quota = textResponse.Usage.PromptTokens + textResponse.Usage.CompletionTokens*completionRatio + quota = textResponse.Usage.PromptTokens + int(float64(textResponse.Usage.CompletionTokens)*completionRatio) } quota = int(float64(quota) * ratio) if ratio != 0 && quota <= 0 { @@ -260,7 +259,7 @@ func relayHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode { common.SysError("Error consuming token remain quota: " + err.Error()) } userId := c.GetInt("id") - model.RecordLog(userId, model.LogTypeConsume, fmt.Sprintf("使用模型 %s 消耗 %d 点额度(模型倍率 %.2f,分组倍率 %.2f)", textRequest.Model, quota, modelRatio, groupRatio)) + model.RecordLog(userId, model.LogTypeConsume, fmt.Sprintf("使用模型 %s 消耗 %d 点额度(模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f)", textRequest.Model, quota, modelRatio, groupRatio, completionRatio)) } }()