From 23b1c63538bdba2cf6b8532dcf3721cd97816209 Mon Sep 17 00:00:00 2001 From: Benny Date: Sat, 19 Aug 2023 16:58:34 +0800 Subject: [PATCH] fix: claude model ratio (#449) * fix: Claude model ratio * chore: update implementation --------- Co-authored-by: JustSong --- common/model-ratio.go | 25 ++++++++++++++++++++++--- controller/relay-text.go | 9 +-------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/common/model-ratio.go b/common/model-ratio.go index 5865b4dc..e658cdc1 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -1,6 +1,9 @@ package common -import "encoding/json" +import ( + "encoding/json" + "strings" +) // ModelRatio // https://platform.openai.com/docs/models/model-endpoint-compatibility @@ -38,8 +41,8 @@ var ModelRatio = map[string]float64{ "text-moderation-stable": 0.1, "text-moderation-latest": 0.1, "dall-e": 8, - "claude-instant-1": 0.75, - "claude-2": 30, + "claude-instant-1": 0.815, // $1.63 / 1M tokens + "claude-2": 5.51, // $11.02 / 1M tokens "ERNIE-Bot": 0.8572, // ¥0.012 / 1k tokens "ERNIE-Bot-turbo": 0.5715, // ¥0.008 / 1k tokens "Embedding-V1": 0.1429, // ¥0.002 / 1k tokens @@ -73,3 +76,19 @@ func GetModelRatio(name string) float64 { } return ratio } + +func GetCompletionRatio(name string) float64 { + if strings.HasPrefix(name, "gpt-3.5") { + return 1.333333 + } + if strings.HasPrefix(name, "gpt-4") { + return 2 + } + if strings.HasPrefix(name, "claude-instant-1") { + return 3.38 + } + if strings.HasPrefix(name, "claude-2") { + return 2.965517 + } + return 1 +} diff --git a/controller/relay-text.go b/controller/relay-text.go index 761ca86f..e061d387 100644 --- a/controller/relay-text.go +++ b/controller/relay-text.go @@ -326,14 +326,7 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode { go func() { if consumeQuota { quota := 0 - completionRatio := 1.0 - if strings.HasPrefix(textRequest.Model, "gpt-3.5") { - completionRatio = 1.333333 - } - if strings.HasPrefix(textRequest.Model, "gpt-4") { - completionRatio = 2 - } - + completionRatio := common.GetCompletionRatio(textRequest.Model) promptTokens = textResponse.Usage.PromptTokens completionTokens = textResponse.Usage.CompletionTokens