diff --git a/common/model-ratio.go b/common/model-ratio.go index f52e1101..8f4be8c3 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -49,6 +49,7 @@ var ModelRatio = map[string]float64{ "ERNIE-Bot-4": 8.572, // ¥0.12 / 1k tokens "Embedding-V1": 0.1429, // ¥0.002 / 1k tokens "PaLM-2": 1, + "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens "chatglm_std": 0.3572, // ¥0.005 / 1k tokens "chatglm_lite": 0.1429, // ¥0.002 / 1k tokens diff --git a/controller/channel-test.go b/controller/channel-test.go index 0d8892b2..af84f089 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "log" + "github.com/gin-gonic/gin" "net/http" "one-api/common" "one-api/model" @@ -14,8 +15,6 @@ import ( "strings" "sync" "time" - - "github.com/gin-gonic/gin" ) func testChannel(channel *model.Channel, request ChatRequest) (err error, openaiErr *OpenAIError) { @@ -53,6 +52,8 @@ func testChannel(channel *model.Channel, request ChatRequest) (err error, openai } requestURL += "/v1/chat/completions" } + // for Cloudflare AI gateway: https://github.com/songquanpeng/one-api/pull/639 + requestURL = strings.Replace(requestURL, "/v1/v1", "/v1", 1) jsonData, err := json.Marshal(request) if err != nil { diff --git a/controller/model.go b/controller/model.go index ed35b7a6..2a7dc538 100644 --- a/controller/model.go +++ b/controller/model.go @@ -333,6 +333,15 @@ func init() { Root: "PaLM-2", Parent: nil, }, + { + Id: "chatglm_turbo", + Object: "model", + Created: 1677649963, + OwnedBy: "zhipu", + Permission: permission, + Root: "chatglm_turbo", + Parent: nil, + }, { Id: "chatglm_pro", Object: "model", diff --git a/controller/relay-text.go b/controller/relay-text.go index 25f9e081..b7a9d9d1 100644 --- a/controller/relay-text.go +++ b/controller/relay-text.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "math" "net/http" "one-api/common" "one-api/model" @@ -433,9 +434,7 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode { completionRatio := common.GetCompletionRatio(textRequest.Model) promptTokens = textResponse.Usage.PromptTokens completionTokens = textResponse.Usage.CompletionTokens - - quota = promptTokens + int(float64(completionTokens)*completionRatio) - quota = int(float64(quota) * ratio) + quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio)) if ratio != 0 && quota <= 0 { quota = 1 } diff --git a/web/src/pages/Channel/EditChannel.js b/web/src/pages/Channel/EditChannel.js index 0e5ddb6c..66003658 100644 --- a/web/src/pages/Channel/EditChannel.js +++ b/web/src/pages/Channel/EditChannel.js @@ -74,7 +74,7 @@ const EditChannel = () => { localModels = ['qwen-turbo', 'qwen-plus', 'text-embedding-v1']; break; case 16: - localModels = ['chatglm_pro', 'chatglm_std', 'chatglm_lite']; + localModels = ['chatglm_turbo', 'chatglm_pro', 'chatglm_std', 'chatglm_lite']; break; case 18: localModels = ['SparkDesk'];