From 1d0b7fb5ae73a9eefe28da5b2cf5f6b6af335c02 Mon Sep 17 00:00:00 2001 From: JustSong Date: Sat, 2 Mar 2024 03:05:25 +0800 Subject: [PATCH] feat: support chatglm-4 (close #1045, close #952, close #952, close #943) --- common/model-ratio.go | 20 +++++++----- relay/channel/openai/adaptor.go | 2 +- relay/channel/openai/main.go | 10 ++++-- relay/channel/openai/model.go | 1 + relay/channel/tencent/main.go | 1 + relay/channel/zhipu/adaptor.go | 34 ++++++++++++++++++++ relay/channel/zhipu/constants.go | 1 + web/berry/src/views/Channel/type/Config.js | 2 +- web/default/src/pages/Channel/EditChannel.js | 2 +- 9 files changed, 59 insertions(+), 14 deletions(-) diff --git a/common/model-ratio.go b/common/model-ratio.go index 3be9118d..1594b534 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -94,14 +94,18 @@ var ModelRatio = map[string]float64{ "claude-2.0": 5.51, // $11.02 / 1M tokens "claude-2.1": 5.51, // $11.02 / 1M tokens // https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 - "ERNIE-Bot": 0.8572, // ¥0.012 / 1k tokens - "ERNIE-Bot-turbo": 0.5715, // ¥0.008 / 1k tokens - "ERNIE-Bot-4": 0.12 * RMB, // ¥0.12 / 1k tokens - "ERNIE-Bot-8k": 0.024 * RMB, - "Embedding-V1": 0.1429, // ¥0.002 / 1k tokens - "PaLM-2": 1, - "gemini-pro": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens - "gemini-pro-vision": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens + "ERNIE-Bot": 0.8572, // ¥0.012 / 1k tokens + "ERNIE-Bot-turbo": 0.5715, // ¥0.008 / 1k tokens + "ERNIE-Bot-4": 0.12 * RMB, // ¥0.12 / 1k tokens + "ERNIE-Bot-8k": 0.024 * RMB, + "Embedding-V1": 0.1429, // ¥0.002 / 1k tokens + "PaLM-2": 1, + "gemini-pro": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens + "gemini-pro-vision": 1, // $0.00025 / 1k characters -> $0.001 / 1k tokens + // https://open.bigmodel.cn/pricing + "glm-4": 0.1 * RMB, + "glm-4v": 0.1 * RMB, + "glm-3-turbo": 0.005 * RMB, "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens "chatglm_std": 0.3572, // ¥0.005 / 1k tokens diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 6afe2b2f..27d0fc27 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -76,7 +76,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *util.RelayMeta, requestBody io func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *util.RelayMeta) (usage *model.Usage, err *model.ErrorWithStatusCode) { if meta.IsStream { var responseText string - err, responseText = StreamHandler(c, resp, meta.Mode) + err, responseText, _ = StreamHandler(c, resp, meta.Mode) usage = ResponseText2Usage(responseText, meta.ActualModelName, meta.PromptTokens) } else { err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName) diff --git a/relay/channel/openai/main.go b/relay/channel/openai/main.go index fbe55cf9..d47cd164 100644 --- a/relay/channel/openai/main.go +++ b/relay/channel/openai/main.go @@ -14,7 +14,7 @@ import ( "strings" ) -func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.ErrorWithStatusCode, string) { +func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.ErrorWithStatusCode, string, *model.Usage) { responseText := "" scanner := bufio.NewScanner(resp.Body) scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { @@ -31,6 +31,7 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E }) dataChan := make(chan string) stopChan := make(chan bool) + var usage *model.Usage go func() { for scanner.Scan() { data := scanner.Text() @@ -54,6 +55,9 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E for _, choice := range streamResponse.Choices { responseText += choice.Delta.Content } + if streamResponse.Usage != nil { + usage = streamResponse.Usage + } case constant.RelayModeCompletions: var streamResponse CompletionsStreamResponse err := json.Unmarshal([]byte(data), &streamResponse) @@ -86,9 +90,9 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E }) err := resp.Body.Close() if err != nil { - return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), "" + return ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), "", nil } - return nil, responseText + return nil, responseText, usage } func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) { diff --git a/relay/channel/openai/model.go b/relay/channel/openai/model.go index b24485a8..6c0b2c53 100644 --- a/relay/channel/openai/model.go +++ b/relay/channel/openai/model.go @@ -132,6 +132,7 @@ type ChatCompletionsStreamResponse struct { Created int64 `json:"created"` Model string `json:"model"` Choices []ChatCompletionsStreamResponseChoice `json:"choices"` + Usage *model.Usage `json:"usage"` } type CompletionsStreamResponse struct { diff --git a/relay/channel/tencent/main.go b/relay/channel/tencent/main.go index 05edac20..fa26651b 100644 --- a/relay/channel/tencent/main.go +++ b/relay/channel/tencent/main.go @@ -81,6 +81,7 @@ func responseTencent2OpenAI(response *ChatResponse) *openai.TextResponse { func streamResponseTencent2OpenAI(TencentResponse *ChatResponse) *openai.ChatCompletionsStreamResponse { response := openai.ChatCompletionsStreamResponse{ + Id: fmt.Sprintf("chatcmpl-%s", helper.GetUUID()), Object: "chat.completion.chunk", Created: helper.GetTimestamp(), Model: "tencent-hunyuan", diff --git a/relay/channel/zhipu/adaptor.go b/relay/channel/zhipu/adaptor.go index 7a822853..90cc79d3 100644 --- a/relay/channel/zhipu/adaptor.go +++ b/relay/channel/zhipu/adaptor.go @@ -5,20 +5,35 @@ import ( "fmt" "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/relay/channel" + "github.com/songquanpeng/one-api/relay/channel/openai" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/util" "io" "net/http" + "strings" ) type Adaptor struct { + APIVersion string } func (a *Adaptor) Init(meta *util.RelayMeta) { } +func (a *Adaptor) SetVersionByModeName(modelName string) { + if strings.HasPrefix(modelName, "glm-") { + a.APIVersion = "v4" + } else { + a.APIVersion = "v3" + } +} + func (a *Adaptor) GetRequestURL(meta *util.RelayMeta) (string, error) { + a.SetVersionByModeName(meta.ActualModelName) + if a.APIVersion == "v4" { + return fmt.Sprintf("%s/api/paas/v4/chat/completions", meta.BaseURL), nil + } method := "invoke" if meta.IsStream { method = "sse-invoke" @@ -37,6 +52,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G if request == nil { return nil, errors.New("request is nil") } + if request.TopP >= 1 { + request.TopP = 0.99 + } + a.SetVersionByModeName(request.Model) + if a.APIVersion == "v4" { + return request, nil + } return ConvertRequest(*request), nil } @@ -44,7 +66,19 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *util.RelayMeta, requestBody io return channel.DoRequestHelper(a, c, meta, requestBody) } +func (a *Adaptor) DoResponseV4(c *gin.Context, resp *http.Response, meta *util.RelayMeta) (usage *model.Usage, err *model.ErrorWithStatusCode) { + if meta.IsStream { + err, _, usage = openai.StreamHandler(c, resp, meta.Mode) + } else { + err, usage = openai.Handler(c, resp, meta.PromptTokens, meta.ActualModelName) + } + return +} + func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *util.RelayMeta) (usage *model.Usage, err *model.ErrorWithStatusCode) { + if a.APIVersion == "v4" { + return a.DoResponseV4(c, resp, meta) + } if meta.IsStream { err, usage = StreamHandler(c, resp) } else { diff --git a/relay/channel/zhipu/constants.go b/relay/channel/zhipu/constants.go index f0367b82..1655a59d 100644 --- a/relay/channel/zhipu/constants.go +++ b/relay/channel/zhipu/constants.go @@ -2,4 +2,5 @@ package zhipu var ModelList = []string{ "chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite", + "glm-4", "glm-4v", "glm-3-turbo", } diff --git a/web/berry/src/views/Channel/type/Config.js b/web/berry/src/views/Channel/type/Config.js index 0e89868b..4dec33de 100644 --- a/web/berry/src/views/Channel/type/Config.js +++ b/web/berry/src/views/Channel/type/Config.js @@ -67,7 +67,7 @@ const typeConfig = { }, 16: { input: { - models: ["chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite"], + models: ["glm-4", "glm-4v", "glm-3-turbo", "chatglm_turbo", "chatglm_pro", "chatglm_std", "chatglm_lite"], }, modelGroup: "zhipu", }, diff --git a/web/default/src/pages/Channel/EditChannel.js b/web/default/src/pages/Channel/EditChannel.js index b9214fd8..693242f9 100644 --- a/web/default/src/pages/Channel/EditChannel.js +++ b/web/default/src/pages/Channel/EditChannel.js @@ -79,7 +79,7 @@ const EditChannel = () => { localModels = [...localModels, ...withInternetVersion]; break; case 16: - localModels = ['chatglm_turbo', 'chatglm_pro', 'chatglm_std', 'chatglm_lite']; + localModels = ["glm-4", "glm-4v", "glm-3-turbo",'chatglm_turbo', 'chatglm_pro', 'chatglm_std', 'chatglm_lite']; break; case 18: localModels = [