From a2a00dfbc3c99573d97e21d64a1f896a7588c00f Mon Sep 17 00:00:00 2001 From: tylinux Date: Sun, 21 Apr 2024 14:53:03 +0800 Subject: [PATCH] feat: groq support Llama3 now (#1333) * feat: groq support Llama3 now * fix: update model ratio --------- Co-authored-by: JustSong --- relay/adaptor/groq/constants.go | 2 ++ relay/billing/ratio/model.go | 14 ++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/relay/adaptor/groq/constants.go b/relay/adaptor/groq/constants.go index fc9a9ebd..1aa2574b 100644 --- a/relay/adaptor/groq/constants.go +++ b/relay/adaptor/groq/constants.go @@ -7,4 +7,6 @@ var ModelList = []string{ "llama2-7b-2048", "llama2-70b-4096", "mixtral-8x7b-32768", + "llama3-8b-8192", + "llama3-70b-8192", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index 24d7615d..d86881cf 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -147,11 +147,13 @@ var ModelRatio = map[string]float64{ "mistral-medium-latest": 2.7 / 1000 * USD, "mistral-large-latest": 8.0 / 1000 * USD, "mistral-embed": 0.1 / 1000 * USD, - // https://wow.groq.com/ - "llama2-70b-4096": 0.7 / 1000 * USD, - "llama2-7b-2048": 0.1 / 1000 * USD, + // https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed + "llama3-70b-8192": 0.59 / 1000 * USD, "mixtral-8x7b-32768": 0.27 / 1000 * USD, + "llama3-8b-8192": 0.05 / 1000 * USD, "gemma-7b-it": 0.1 / 1000 * USD, + "llama2-70b-4096": 0.64 / 1000 * USD, + "llama2-7b-2048": 0.1 / 1000 * USD, // https://platform.lingyiwanwu.com/docs#-计费单元 "yi-34b-chat-0205": 2.5 / 1000 * RMB, "yi-34b-chat-200k": 12.0 / 1000 * RMB, @@ -277,7 +279,11 @@ func GetCompletionRatio(name string) float64 { } switch name { case "llama2-70b-4096": - return 0.8 / 0.7 + return 0.8 / 0.64 + case "llama3-8b-8192": + return 2 + case "llama3-70b-8192": + return 0.79 / 0.59 } return 1 }