diff --git a/common/model-ratio.go b/common/model-ratio.go
index 91d8043e..d2824bce 100644
--- a/common/model-ratio.go
+++ b/common/model-ratio.go
@@ -7,7 +7,7 @@ import (
)
type ModelType struct {
- Ratio float64
+ Ratio []float64
Type int
}
@@ -20,102 +20,156 @@ var ModelTypes map[string]ModelType
// TODO: when a new api is enabled, check the pricing here
// 1 === $0.002 / 1K tokens
// 1 === ¥0.014 / 1k tokens
-var ModelRatio map[string]float64
+var ModelRatio map[string][]float64
func init() {
ModelTypes = map[string]ModelType{
- "gpt-4": {15, ChannelTypeOpenAI},
- "gpt-4-0314": {15, ChannelTypeOpenAI},
- "gpt-4-0613": {15, ChannelTypeOpenAI},
- "gpt-4-32k": {30, ChannelTypeOpenAI},
- "gpt-4-32k-0314": {30, ChannelTypeOpenAI},
- "gpt-4-32k-0613": {30, ChannelTypeOpenAI},
- "gpt-4-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens
- "gpt-4-1106-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens
- "gpt-4-0125-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens
- "gpt-4-vision-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens
- "gpt-3.5-turbo": {0.75, ChannelTypeOpenAI}, // $0.0015 / 1K tokens
- "gpt-3.5-turbo-0301": {0.75, ChannelTypeOpenAI},
- "gpt-3.5-turbo-0613": {0.75, ChannelTypeOpenAI},
- "gpt-3.5-turbo-16k": {1.5, ChannelTypeOpenAI}, // $0.003 / 1K tokens
- "gpt-3.5-turbo-16k-0613": {1.5, ChannelTypeOpenAI},
- "gpt-3.5-turbo-instruct": {0.75, ChannelTypeOpenAI}, // $0.0015 / 1K tokens
- "gpt-3.5-turbo-1106": {0.5, ChannelTypeOpenAI}, // $0.001 / 1K tokens
- "gpt-3.5-turbo-0125": {0.25, ChannelTypeOpenAI}, // $0.0005 / $0.0015 / 1K tokens
- "davinci-002": {1, ChannelTypeOpenAI}, // $0.002 / 1K tokens
- "babbage-002": {0.2, ChannelTypeOpenAI}, // $0.002 / 1K tokens
- "text-ada-001": {0.2, ChannelTypeOpenAI},
- "text-babbage-001": {0.25, ChannelTypeOpenAI},
- "text-curie-001": {1, ChannelTypeOpenAI},
- "text-davinci-002": {10, ChannelTypeOpenAI},
- "text-davinci-003": {10, ChannelTypeOpenAI},
- "text-davinci-edit-001": {10, ChannelTypeOpenAI},
- "code-davinci-edit-001": {10, ChannelTypeOpenAI},
- "whisper-1": {15, ChannelTypeOpenAI}, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
- "tts-1": {7.5, ChannelTypeOpenAI}, // $0.015 / 1K characters
- "tts-1-1106": {7.5, ChannelTypeOpenAI},
- "tts-1-hd": {15, ChannelTypeOpenAI}, // $0.030 / 1K characters
- "tts-1-hd-1106": {15, ChannelTypeOpenAI},
- "davinci": {10, ChannelTypeOpenAI},
- "curie": {10, ChannelTypeOpenAI},
- "babbage": {10, ChannelTypeOpenAI},
- "ada": {10, ChannelTypeOpenAI},
- "text-embedding-ada-002": {0.05, ChannelTypeOpenAI},
- "text-embedding-3-small": {0.01, ChannelTypeOpenAI}, // $0.00002 / 1K tokens
- "text-embedding-3-large": {0.065, ChannelTypeOpenAI}, // $0.00013 / 1K tokens
- "text-search-ada-doc-001": {10, ChannelTypeOpenAI},
- "text-moderation-stable": {0.1, ChannelTypeOpenAI},
- "text-moderation-latest": {0.1, ChannelTypeOpenAI},
- "dall-e-2": {8, ChannelTypeOpenAI}, // $0.016 - $0.020 / image
- "dall-e-3": {20, ChannelTypeOpenAI}, // $0.040 - $0.120 / image
- "claude-instant-1": {0.815, ChannelTypeAnthropic}, // $1.63 / 1M tokens
- "claude-2": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens
- "claude-2.0": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens
- "claude-2.1": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens
- "ERNIE-Bot": {0.8572, ChannelTypeBaidu}, // ¥0.012 / 1k tokens
- "ERNIE-Bot-turbo": {0.5715, ChannelTypeBaidu}, // ¥0.008 / 1k tokens
- "ERNIE-Bot-4": {8.572, ChannelTypeBaidu}, // ¥0.12 / 1k tokens
- "Embedding-V1": {0.1429, ChannelTypeBaidu}, // ¥0.002 / 1k tokens
- "PaLM-2": {1, ChannelTypePaLM},
- "gemini-pro": {1, ChannelTypeGemini}, // $0.00025 / 1k characters -> $0.001 / 1k tokens
- "gemini-pro-vision": {1, ChannelTypeGemini}, // $0.00025 / 1k characters -> $0.001 / 1k tokens
- "chatglm_turbo": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens
- "chatglm_pro": {0.7143, ChannelTypeZhipu}, // ¥0.01 / 1k tokens
- "chatglm_std": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens
- "chatglm_lite": {0.1429, ChannelTypeZhipu}, // ¥0.002 / 1k tokens
- "glm-3-turbo": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens
- "glm-4": {7.143, ChannelTypeZhipu}, // ¥0.1 / 1k tokens
- "glm-4v": {7.143, ChannelTypeZhipu}, // ¥0.1 / 1k tokens
- "embedding-2": {0.0357, ChannelTypeZhipu}, // ¥0.0005 / 1k tokens
- "cogview-3": {17.8571, ChannelTypeZhipu}, // ¥0.25 / 1张图片
- "qwen-turbo": {0.5715, ChannelTypeAli}, // ¥0.008 / 1k tokens // https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
- "qwen-plus": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens
- "qwen-max": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens
- "qwen-max-longcontext": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens
- "qwen-vl-plus": {0.5715, ChannelTypeAli}, // ¥0.008 / 1k tokens
- "text-embedding-v1": {0.05, ChannelTypeAli}, // ¥0.0007 / 1k tokens
- "SparkDesk": {1.2858, ChannelTypeXunfei}, // ¥0.018 / 1k tokens
- "360GPT_S2_V9": {0.8572, ChannelType360}, // ¥0.012 / 1k tokens
- "embedding-bert-512-v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens
- "embedding_s1_v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens
- "semantic_similarity_s1_v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens
- "hunyuan": {7.143, ChannelTypeTencent}, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
- "Baichuan2-Turbo": {0.5715, ChannelTypeBaichuan}, // ¥0.008 / 1k tokens
- "Baichuan2-Turbo-192k": {1.143, ChannelTypeBaichuan}, // ¥0.016 / 1k tokens
- "Baichuan2-53B": {1.4286, ChannelTypeBaichuan}, // ¥0.02 / 1k tokens
- "Baichuan-Text-Embedding": {0.0357, ChannelTypeBaichuan}, // ¥0.0005 / 1k tokens
- "abab5.5s-chat": {0.3572, ChannelTypeMiniMax}, // ¥0.005 / 1k tokens
- "abab5.5-chat": {1.0714, ChannelTypeMiniMax}, // ¥0.015 / 1k tokens
- "abab6-chat": {14.2857, ChannelTypeMiniMax}, // ¥0.2 / 1k tokens
- "embo-01": {0.0357, ChannelTypeMiniMax}, // ¥0.0005 / 1k tokens
- "deepseek-coder": {0.75, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens
- "deepseek-chat": {0.75, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens
- "moonshot-v1-8k": {0.8572, ChannelTypeMoonshot}, // ¥0.012 / 1K tokens
- "moonshot-v1-32k": {1.7143, ChannelTypeMoonshot}, // ¥0.024 / 1K tokens
- "moonshot-v1-128k": {4.2857, ChannelTypeMoonshot}, // ¥0.06 / 1K tokens
+ // $0.03 / 1K tokens $0.06 / 1K tokens
+ "gpt-4": {[]float64{15, 30}, ChannelTypeOpenAI},
+ "gpt-4-0314": {[]float64{15, 30}, ChannelTypeOpenAI},
+ "gpt-4-0613": {[]float64{15, 30}, ChannelTypeOpenAI},
+ // $0.06 / 1K tokens $0.12 / 1K tokens
+ "gpt-4-32k": {[]float64{30, 60}, ChannelTypeOpenAI},
+ "gpt-4-32k-0314": {[]float64{30, 60}, ChannelTypeOpenAI},
+ "gpt-4-32k-0613": {[]float64{30, 60}, ChannelTypeOpenAI},
+ // $0.01 / 1K tokens $0.03 / 1K tokens
+ "gpt-4-preview": {[]float64{5, 15}, ChannelTypeOpenAI},
+ "gpt-4-1106-preview": {[]float64{5, 15}, ChannelTypeOpenAI},
+ "gpt-4-0125-preview": {[]float64{5, 15}, ChannelTypeOpenAI},
+ "gpt-4-vision-preview": {[]float64{5, 15}, ChannelTypeOpenAI},
+ // $0.0005 / 1K tokens $0.0015 / 1K tokens
+ "gpt-3.5-turbo": {[]float64{0.25, 0.75}, ChannelTypeOpenAI},
+ "gpt-3.5-turbo-0125": {[]float64{0.25, 0.75}, ChannelTypeOpenAI},
+ // $0.0015 / 1K tokens $0.002 / 1K tokens
+ "gpt-3.5-turbo-0301": {[]float64{0.75, 1}, ChannelTypeOpenAI},
+ "gpt-3.5-turbo-0613": {[]float64{0.75, 1}, ChannelTypeOpenAI},
+ "gpt-3.5-turbo-instruct": {[]float64{0.75, 1}, ChannelTypeOpenAI},
+ // $0.003 / 1K tokens $0.004 / 1K tokens
+ "gpt-3.5-turbo-16k": {[]float64{1.5, 2}, ChannelTypeOpenAI},
+ "gpt-3.5-turbo-16k-0613": {[]float64{1.5, 2}, ChannelTypeOpenAI},
+ // $0.001 / 1K tokens $0.002 / 1K tokens
+ "gpt-3.5-turbo-1106": {[]float64{0.5, 1}, ChannelTypeOpenAI},
+ // $0.0020 / 1K tokens
+ "davinci-002": {[]float64{1, 1}, ChannelTypeOpenAI},
+ // $0.0004 / 1K tokens
+ "babbage-002": {[]float64{0.2, 0.2}, ChannelTypeOpenAI},
+ "text-ada-001": {[]float64{0.2, 0.2}, ChannelTypeOpenAI},
+ "text-babbage-001": {[]float64{0.25, 0.25}, ChannelTypeOpenAI},
+ "text-curie-001": {[]float64{1, 1}, ChannelTypeOpenAI},
+ "text-davinci-002": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "text-davinci-003": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "text-davinci-edit-001": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "code-davinci-edit-001": {[]float64{10, 10}, ChannelTypeOpenAI},
+ // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
+ "whisper-1": {[]float64{15, 15}, ChannelTypeOpenAI},
+ // $0.015 / 1K characters
+ "tts-1": {[]float64{7.5, 7.5}, ChannelTypeOpenAI},
+ "tts-1-1106": {[]float64{7.5, 7.5}, ChannelTypeOpenAI},
+ // $0.030 / 1K characters
+ "tts-1-hd": {[]float64{15, 15}, ChannelTypeOpenAI},
+ "tts-1-hd-1106": {[]float64{15, 15}, ChannelTypeOpenAI},
+ "davinci": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "curie": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "babbage": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "ada": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "text-embedding-ada-002": {[]float64{0.05, 0.05}, ChannelTypeOpenAI},
+ // $0.00002 / 1K tokens
+ "text-embedding-3-small": {[]float64{0.01, 0.01}, ChannelTypeOpenAI},
+ // $0.00013 / 1K tokens
+ "text-embedding-3-large": {[]float64{0.065, 0.065}, ChannelTypeOpenAI},
+ "text-search-ada-doc-001": {[]float64{10, 10}, ChannelTypeOpenAI},
+ "text-moderation-stable": {[]float64{0.1, 0.1}, ChannelTypeOpenAI},
+ "text-moderation-latest": {[]float64{0.1, 0.1}, ChannelTypeOpenAI},
+ // $0.016 - $0.020 / image
+ "dall-e-2": {[]float64{8, 8}, ChannelTypeOpenAI},
+ // $0.040 - $0.120 / image
+ "dall-e-3": {[]float64{20, 20}, ChannelTypeOpenAI},
+
+ // $0.80/million tokens $2.40/million tokens
+ "claude-instant-1": {[]float64{0.4, 1.2}, ChannelTypeAnthropic},
+ // $8.00/million tokens $24.00/million tokens
+ "claude-2": {[]float64{4, 12}, ChannelTypeAnthropic},
+ "claude-2.0": {[]float64{4, 12}, ChannelTypeAnthropic},
+ "claude-2.1": {[]float64{4, 12}, ChannelTypeAnthropic},
+
+ // ¥0.012 / 1k tokens ¥0.012 / 1k tokens
+ "ERNIE-Bot": {[]float64{0.8572, 0.8572}, ChannelTypeBaidu},
+ // 0.024元/千tokens 0.048元/千tokens
+ "ERNIE-Bot-8k": {[]float64{1.7143, 3.4286}, ChannelTypeBaidu},
+ // ¥0.008 / 1k tokens ¥0.008 / 1k tokens
+ "ERNIE-Bot-turbo": {[]float64{0.5715, 0.5715}, ChannelTypeBaidu},
+ // ¥0.12 / 1k tokens ¥0.12 / 1k tokens
+ "ERNIE-Bot-4": {[]float64{8.572, 8.572}, ChannelTypeBaidu},
+ // ¥0.002 / 1k tokens
+ "Embedding-V1": {[]float64{0.1429, 0.1429}, ChannelTypeBaidu},
+
+ "PaLM-2": {[]float64{1, 1}, ChannelTypePaLM},
+ "gemini-pro": {[]float64{1, 1}, ChannelTypeGemini},
+ "gemini-pro-vision": {[]float64{1, 1}, ChannelTypeGemini},
+
+ // ¥0.005 / 1k tokens
+ "chatglm_turbo": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu},
+ "chatglm_std": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu},
+ "glm-3-turbo": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu},
+ // ¥0.01 / 1k tokens
+ "chatglm_pro": {[]float64{0.7143, 0.7143}, ChannelTypeZhipu},
+ // ¥0.002 / 1k tokens
+ "chatglm_lite": {[]float64{0.1429, 0.1429}, ChannelTypeZhipu},
+ // ¥0.1 / 1k tokens
+ "glm-4": {[]float64{7.143, 7.143}, ChannelTypeZhipu},
+ "glm-4v": {[]float64{7.143, 7.143}, ChannelTypeZhipu},
+ // ¥0.0005 / 1k tokens
+ "embedding-2": {[]float64{0.0357, 0.0357}, ChannelTypeZhipu},
+ // ¥0.25 / 1张图片
+ "cogview-3": {[]float64{17.8571, 17.8571}, ChannelTypeZhipu},
+
+ // ¥0.008 / 1k tokens
+ "qwen-turbo": {[]float64{0.5715, 0.5715}, ChannelTypeAli},
+ // ¥0.02 / 1k tokens
+ "qwen-plus": {[]float64{1.4286, 1.4286}, ChannelTypeAli},
+ "qwen-max": {[]float64{1.4286, 1.4286}, ChannelTypeAli},
+ "qwen-max-longcontext": {[]float64{1.4286, 1.4286}, ChannelTypeAli},
+ "qwen-vl": {[]float64{0.5715, 0.5715}, ChannelTypeAli},
+ "qwen-vl-plus": {[]float64{0.5715, 0.5715}, ChannelTypeAli},
+ // ¥0.0007 / 1k tokens
+ "text-embedding-v1": {[]float64{0.05, 0.05}, ChannelTypeAli},
+
+ // ¥0.018 / 1k tokens
+ "SparkDesk": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei},
+ "SparkDesk-v1.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei},
+ "SparkDesk-v2.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei},
+ "SparkDesk-v3.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei},
+ "SparkDesk-v3.5": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei},
+
+ // ¥0.012 / 1k tokens
+ "360GPT_S2_V9": {[]float64{0.8572, 0.8572}, ChannelType360},
+ // ¥0.001 / 1k tokens
+ "embedding-bert-512-v1": {[]float64{0.0715, 0.0715}, ChannelType360},
+ "embedding_s1_v1": {[]float64{0.0715, 0.0715}, ChannelType360},
+ "semantic_similarity_s1_v1": {[]float64{0.0715, 0.0715}, ChannelType360},
+
+ // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
+ "hunyuan": {[]float64{7.143, 7.143}, ChannelTypeTencent},
+
+ "Baichuan2-Turbo": {[]float64{0.5715, 0.5715}, ChannelTypeBaichuan}, // ¥0.008 / 1k tokens
+ "Baichuan2-Turbo-192k": {[]float64{1.143, 1.143}, ChannelTypeBaichuan}, // ¥0.016 / 1k tokens
+ "Baichuan2-53B": {[]float64{1.4286, 1.4286}, ChannelTypeBaichuan}, // ¥0.02 / 1k tokens
+ "Baichuan-Text-Embedding": {[]float64{0.0357, 0.0357}, ChannelTypeBaichuan}, // ¥0.0005 / 1k tokens
+
+ "abab5.5s-chat": {[]float64{0.3572, 0.3572}, ChannelTypeMiniMax}, // ¥0.005 / 1k tokens
+ "abab5.5-chat": {[]float64{1.0714, 1.0714}, ChannelTypeMiniMax}, // ¥0.015 / 1k tokens
+ "abab6-chat": {[]float64{14.2857, 14.2857}, ChannelTypeMiniMax}, // ¥0.2 / 1k tokens
+ "embo-01": {[]float64{0.0357, 0.0357}, ChannelTypeMiniMax}, // ¥0.0005 / 1k tokens
+
+ "deepseek-coder": {[]float64{0.75, 0.75}, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens
+ "deepseek-chat": {[]float64{0.75, 0.75}, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens
+
+ "moonshot-v1-8k": {[]float64{0.8572, 0.8572}, ChannelTypeMoonshot}, // ¥0.012 / 1K tokens
+ "moonshot-v1-32k": {[]float64{1.7143, 1.7143}, ChannelTypeMoonshot}, // ¥0.024 / 1K tokens
+ "moonshot-v1-128k": {[]float64{4.2857, 4.2857}, ChannelTypeMoonshot}, // ¥0.06 / 1K tokens
}
- ModelRatio = make(map[string]float64)
+ ModelRatio = make(map[string][]float64)
for name, modelType := range ModelTypes {
ModelRatio[name] = modelType.Ratio
}
@@ -153,18 +207,25 @@ func ModelRatio2JSONString() string {
}
func UpdateModelRatioByJSONString(jsonStr string) error {
- ModelRatio = make(map[string]float64)
+ ModelRatio = make(map[string][]float64)
return json.Unmarshal([]byte(jsonStr), &ModelRatio)
}
func MergeModelRatioByJSONString(jsonStr string) (newJsonStr string, err error) {
- inputModelRatio := make(map[string]float64)
+ isNew := false
+ inputModelRatio := make(map[string][]float64)
err = json.Unmarshal([]byte(jsonStr), &inputModelRatio)
if err != nil {
- return
+ inputModelRatioOld := make(map[string]float64)
+ err = json.Unmarshal([]byte(jsonStr), &inputModelRatioOld)
+ if err != nil {
+ return
+ }
+
+ inputModelRatio = UpdateModeRatioFormat(inputModelRatioOld)
+ isNew = true
}
- isNew := false
// 与现有的ModelRatio进行比较,如果有新增的模型,需要添加
for key, value := range ModelRatio {
if _, ok := inputModelRatio[key]; !ok {
@@ -186,14 +247,23 @@ func MergeModelRatioByJSONString(jsonStr string) (newJsonStr string, err error)
return
}
-func GetModelRatio(name string) float64 {
+func UpdateModeRatioFormat(modelRatioOld map[string]float64) map[string][]float64 {
+ modelRatioNew := make(map[string][]float64)
+ for key, value := range modelRatioOld {
+ completionRatio := GetCompletionRatio(key) * value
+ modelRatioNew[key] = []float64{value, completionRatio}
+ }
+ return modelRatioNew
+}
+
+func GetModelRatio(name string) []float64 {
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
name = strings.TrimSuffix(name, "-internet")
}
ratio, ok := ModelRatio[name]
if !ok {
SysError("model ratio not found: " + name)
- return 30
+ return []float64{30, 30}
}
return ratio
}
diff --git a/controller/quota.go b/controller/quota.go
index e8a083a1..5bb9f6d4 100644
--- a/controller/quota.go
+++ b/controller/quota.go
@@ -18,7 +18,7 @@ type QuotaInfo struct {
modelName string
promptTokens int
preConsumedTokens int
- modelRatio float64
+ modelRatio []float64
groupRatio float64
ratio float64
preConsumedQuota int
@@ -51,7 +51,7 @@ func (q *QuotaInfo) initQuotaInfo(groupName string) {
modelRatio := common.GetModelRatio(q.modelName)
groupRatio := common.GetGroupRatio(groupName)
preConsumedTokens := common.PreConsumedQuota
- ratio := modelRatio * groupRatio
+ ratio := modelRatio[0] * groupRatio
preConsumedQuota := int(float64(q.promptTokens+preConsumedTokens) * ratio)
q.preConsumedTokens = preConsumedTokens
@@ -97,10 +97,10 @@ func (q *QuotaInfo) preQuotaConsumption() *types.OpenAIErrorWithStatusCode {
func (q *QuotaInfo) completedQuotaConsumption(usage *types.Usage, tokenName string, ctx context.Context) error {
quota := 0
- completionRatio := common.GetCompletionRatio(q.modelName)
+ completionRatio := q.modelRatio[1] * q.groupRatio
promptTokens := usage.PromptTokens
completionTokens := usage.CompletionTokens
- quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * q.ratio))
+ quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio)))
if q.ratio != 0 && quota <= 0 {
quota = 1
}
@@ -128,8 +128,14 @@ func (q *QuotaInfo) completedQuotaConsumption(usage *types.Usage, tokenName stri
requestTime = int(time.Since(requestStartTime).Milliseconds())
}
}
+ var modelRatioStr string
+ if q.modelRatio[0] == q.modelRatio[1] {
+ modelRatioStr = fmt.Sprintf("%.2f", q.modelRatio[0])
+ } else {
+ modelRatioStr = fmt.Sprintf("%.2f (输入)/%.2f (输出)", q.modelRatio[0], q.modelRatio[1])
+ }
- logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", q.modelRatio, q.groupRatio)
+ logContent := fmt.Sprintf("模型倍率 %s,分组倍率 %.2f", modelRatioStr, q.groupRatio)
model.RecordConsumeLog(ctx, q.userId, q.channelId, promptTokens, completionTokens, q.modelName, tokenName, quota, logContent, requestTime)
model.UpdateUserUsedQuotaAndRequestCount(q.userId, quota)
model.UpdateChannelUsedQuota(q.channelId, quota)
diff --git a/modelRatio.json b/modelRatio.json
new file mode 100644
index 00000000..b509e58b
--- /dev/null
+++ b/modelRatio.json
@@ -0,0 +1,97 @@
+{
+ "gpt-4": [15, 30],
+ "gpt-4-0314": [15, 30],
+ "gpt-4-0613": [15, 30],
+ "gpt-4-32k": [30, 60],
+ "gpt-4-32k-0314": [30, 60],
+ "gpt-4-32k-0613": [30, 60],
+ "gpt-4-preview": [5, 15],
+ "gpt-4-1106-preview": [5, 15],
+ "gpt-4-0125-preview": [5, 15],
+ "gpt-4-vision-preview": [5, 15],
+ "gpt-3.5-turbo": [0.25, 0.75],
+ "gpt-3.5-turbo-0125": [0.25, 0.75],
+ "gpt-3.5-turbo-0301": [0.75, 1],
+ "gpt-3.5-turbo-0613": [0.75, 1],
+ "gpt-3.5-turbo-instruct": [0.75, 1],
+ "gpt-3.5-turbo-16k": [1.5, 2],
+ "gpt-3.5-turbo-16k-0613": [1.5, 2],
+ "gpt-3.5-turbo-1106": [0.5, 1],
+ "davinci-002": [1, 1],
+ "babbage-002": [0.2, 0.2],
+ "text-ada-001": [0.2, 0.2],
+ "text-babbage-001": [0.25, 0.25],
+ "text-curie-001": [1, 1],
+ "text-davinci-002": [10, 10],
+ "text-davinci-003": [10, 10],
+ "text-davinci-edit-001": [10, 10],
+ "code-davinci-edit-001": [10, 10],
+ "whisper-1": [15, 15],
+ "tts-1": [7.5, 7.5],
+ "tts-1-1106": [7.5, 7.5],
+ "tts-1-hd": [15, 15],
+ "tts-1-hd-1106": [15, 15],
+ "davinci": [10, 10],
+ "curie": [10, 10],
+ "babbage": [10, 10],
+ "ada": [10, 10],
+ "text-embedding-ada-002": [0.05, 0.05],
+ "text-embedding-3-small": [0.01, 0.01],
+ "text-embedding-3-large": [0.065, 0.065],
+ "text-search-ada-doc-001": [10, 10],
+ "text-moderation-stable": [0.1, 0.1],
+ "text-moderation-latest": [0.1, 0.1],
+ "dall-e-2": [8, 8],
+ "dall-e-3": [20, 20],
+ "claude-instant-1": [0.4, 1.2],
+ "claude-2": [4, 12],
+ "claude-2.0": [4, 12],
+ "claude-2.1": [4, 12],
+ "ERNIE-Bot": [0.8572, 0.8572],
+ "ERNIE-Bot-8k": [1.7143, 3.4286],
+ "ERNIE-Bot-turbo": [0.5715, 0.5715],
+ "ERNIE-Bot-4": [8.572, 8.572],
+ "Embedding-V1": [0.1429, 0.1429],
+ "PaLM-2": [1, 1],
+ "gemini-pro": [1, 1],
+ "gemini-pro-vision": [1, 1],
+ "chatglm_turbo": [0.3572, 0.3572],
+ "chatglm_std": [0.3572, 0.3572],
+ "glm-3-turbo": [0.3572, 0.3572],
+ "chatglm_pro": [0.7143, 0.7143],
+ "chatglm_lite": [0.1429, 0.1429],
+ "glm-4": [7.143, 7.143],
+ "glm-4v": [7.143, 7.143],
+ "embedding-2": [0.0357, 0.0357],
+ "cogview-3": [17.8571, 17.8571],
+ "qwen-turbo": [0.5715, 0.5715],
+ "qwen-plus": [1.4286, 1.4286],
+ "qwen-max": [1.4286, 1.4286],
+ "qwen-max-longcontext": [1.4286, 1.4286],
+ "qwen-vl": [0.5715, 0.5715],
+ "qwen-vl-plus": [0.5715, 0.5715],
+ "text-embedding-v1": [0.05, 0.05],
+ "SparkDesk": [1.2858, 1.2858],
+ "SparkDesk-v1.1": [1.2858, 1.2858],
+ "SparkDesk-v2.1": [1.2858, 1.2858],
+ "SparkDesk-v3.1": [1.2858, 1.2858],
+ "SparkDesk-v3.5": [1.2858, 1.2858],
+ "360GPT_S2_V9": [0.8572, 0.8572],
+ "embedding-bert-512-v1": [0.0715, 0.0715],
+ "embedding_s1_v1": [0.0715, 0.0715],
+ "semantic_similarity_s1_v1": [0.0715, 0.0715],
+ "hunyuan": [7.143, 7.143],
+ "Baichuan2-Turbo": [0.5715, 0.5715],
+ "Baichuan2-Turbo-192k": [1.143, 1.143],
+ "Baichuan2-53B": [1.4286, 1.4286],
+ "Baichuan-Text-Embedding": [0.0357, 0.0357],
+ "abab5.5s-chat": [0.3572, 0.3572],
+ "abab5.5-chat": [1.0714, 1.0714],
+ "abab6-chat": [14.2857, 14.2857],
+ "embo-01": [0.0357, 0.0357],
+ "deepseek-coder": [0.75, 0.75],
+ "deepseek-chat": [0.75, 0.75],
+ "moonshot-v1-8k": [0.8572, 0.8572],
+ "moonshot-v1-32k": [1.7143, 1.7143],
+ "moonshot-v1-128k": [4.2857, 4.2857]
+}
diff --git a/providers/openai/base.go b/providers/openai/base.go
index eac3b502..3f11e977 100644
--- a/providers/openai/base.go
+++ b/providers/openai/base.go
@@ -85,13 +85,6 @@ func (p *OpenAIProvider) GetFullRequestURL(requestURL string, modelName string)
if p.IsAzure {
apiVersion := p.Channel.Other
- // 以-分割,检测modelName 最后一个元素是否为4位数字,必须是数字,如果是则删除modelName最后一个元素
- modelNameSlice := strings.Split(modelName, "-")
- lastModelNameSlice := modelNameSlice[len(modelNameSlice)-1]
- modelNum := common.String2Int(lastModelNameSlice)
- if modelNum > 999 && modelNum < 10000 {
- modelName = strings.TrimSuffix(modelName, "-"+lastModelNameSlice)
- }
// 检测模型是是否包含 . 如果有则直接去掉
modelName = strings.Replace(modelName, ".", "", -1)
diff --git a/providers/xunfei/base.go b/providers/xunfei/base.go
index fdaf8647..e6a7975d 100644
--- a/providers/xunfei/base.go
+++ b/providers/xunfei/base.go
@@ -68,7 +68,7 @@ func (p *XunfeiProvider) GetFullRequestURL(requestURL string, modelName string)
if len(splits) != 3 {
return ""
}
- domain, authUrl := p.getXunfeiAuthUrl(splits[2], splits[1])
+ domain, authUrl := p.getXunfeiAuthUrl(splits[2], splits[1], modelName)
p.domain = domain
p.apiId = splits[0]
@@ -76,20 +76,47 @@ func (p *XunfeiProvider) GetFullRequestURL(requestURL string, modelName string)
return authUrl
}
-func (p *XunfeiProvider) getXunfeiAuthUrl(apiKey string, apiSecret string) (string, string) {
+func (p *XunfeiProvider) getAPIVersion(modelName string) string {
query := p.Context.Request.URL.Query()
apiVersion := query.Get("api-version")
- if apiVersion == "" {
- apiVersion = p.Channel.Other
+ if apiVersion != "" {
+ return apiVersion
}
- if apiVersion == "" {
- apiVersion = "v1.1"
- common.SysLog("api_version not found, use default: " + apiVersion)
+ parts := strings.Split(modelName, "-")
+ if len(parts) == 2 {
+ apiVersion = parts[1]
+ return apiVersion
}
- domain := "general"
- if apiVersion != "v1.1" {
- domain += strings.Split(apiVersion, ".")[0]
+
+ apiVersion = p.Channel.Other
+ if apiVersion != "" {
+ return apiVersion
}
+ apiVersion = "v1.1"
+
+ common.SysLog("api_version not found, use default: " + apiVersion)
+ return apiVersion
+}
+
+// https://www.xfyun.cn/doc/spark/Web.html#_1-%E6%8E%A5%E5%8F%A3%E8%AF%B4%E6%98%8E
+func apiVersion2domain(apiVersion string) string {
+ switch apiVersion {
+ case "v1.1":
+ return "general"
+ case "v2.1":
+ return "generalv2"
+ case "v3.1":
+ return "generalv3"
+ case "v3.5":
+ return "generalv3.5"
+ }
+ return "general" + apiVersion
+}
+
+func (p *XunfeiProvider) getXunfeiAuthUrl(apiKey string, apiSecret string, modelName string) (string, string) {
+ apiVersion := p.getAPIVersion(modelName)
+ domain := apiVersion2domain(apiVersion)
+
authUrl := p.buildXunfeiAuthUrl(fmt.Sprintf("%s/%s/chat", p.Config.BaseURL, apiVersion), apiKey, apiSecret)
return domain, authUrl
}
diff --git a/web/package.json b/web/package.json
index 8918ddbc..e376b4b8 100644
--- a/web/package.json
+++ b/web/package.json
@@ -13,6 +13,7 @@
"@mui/material": "^5.8.6",
"@mui/system": "^5.8.6",
"@mui/utils": "^5.8.6",
+ "@mui/x-data-grid": "^6.19.4",
"@mui/x-date-pickers": "^6.18.5",
"@tabler/icons-react": "^2.44.0",
"apexcharts": "^3.35.3",
diff --git a/web/src/views/Channel/component/NameLabel.js b/web/src/views/Channel/component/NameLabel.js
index 061ed617..612a9356 100644
--- a/web/src/views/Channel/component/NameLabel.js
+++ b/web/src/views/Channel/component/NameLabel.js
@@ -30,6 +30,9 @@ const NameLabel = ({ name, models }) => {
onClick={() => {
copy(item, '模型名称');
}}
+ onTouchEnd={() => {
+ copy(item, '模型名称');
+ }}
>
{item}
@@ -39,6 +42,7 @@ const NameLabel = ({ name, models }) => {
}
placement="top"
+ enterTouchDelay={0}
>
{name}
diff --git a/web/src/views/Channel/type/Config.js b/web/src/views/Channel/type/Config.js
index c9facb8d..f97b6f98 100644
--- a/web/src/views/Channel/type/Config.js
+++ b/web/src/views/Channel/type/Config.js
@@ -109,7 +109,7 @@ const typeConfig = {
other: '版本号'
},
input: {
- models: ['SparkDesk']
+ models: ['SparkDesk', 'SparkDesk-v1.1', 'SparkDesk-v2.1', 'SparkDesk-v3.1', 'SparkDesk-v3.5']
},
prompt: {
key: '按照如下格式输入:APPID|APISecret|APIKey',
diff --git a/web/src/views/Setting/component/ModelRationDataGrid.js b/web/src/views/Setting/component/ModelRationDataGrid.js
new file mode 100644
index 00000000..edac3ff0
--- /dev/null
+++ b/web/src/views/Setting/component/ModelRationDataGrid.js
@@ -0,0 +1,275 @@
+import PropTypes from 'prop-types';
+import { useState, useEffect, useMemo, useCallback } from 'react';
+import { GridRowModes, DataGrid, GridToolbarContainer, GridActionsCellItem } from '@mui/x-data-grid';
+import { Box, Button } from '@mui/material';
+import AddIcon from '@mui/icons-material/Add';
+import EditIcon from '@mui/icons-material/Edit';
+import DeleteIcon from '@mui/icons-material/DeleteOutlined';
+import SaveIcon from '@mui/icons-material/Save';
+import CancelIcon from '@mui/icons-material/Close';
+import { showError } from 'utils/common';
+
+function validation(row, rows) {
+ if (row.model === '') {
+ return '模型名称不能为空';
+ }
+
+ // 判断 model是否是唯一值
+ if (rows.filter((r) => r.model === row.model && (row.isNew || r.id !== row.id)).length > 0) {
+ return '模型名称不能重复';
+ }
+
+ if (row.input === '' || row.input < 0) {
+ return '输入倍率必须大于等于0';
+ }
+ if (row.complete === '' || row.complete < 0) {
+ return '完成倍率必须大于等于0';
+ }
+ return false;
+}
+
+function randomId() {
+ return Math.random().toString(36).substr(2, 9);
+}
+
+function EditToolbar({ setRows, setRowModesModel }) {
+ const handleClick = () => {
+ const id = randomId();
+ setRows((oldRows) => [{ id, model: '', input: 0, complete: 0, isNew: true }, ...oldRows]);
+ setRowModesModel((oldModel) => ({
+ [id]: { mode: GridRowModes.Edit, fieldToFocus: 'name' },
+ ...oldModel
+ }));
+ };
+
+ return (
+
+ } onClick={handleClick}>
+ 新增
+
+
+ );
+}
+
+EditToolbar.propTypes = {
+ setRows: PropTypes.func.isRequired,
+ setRowModesModel: PropTypes.func.isRequired
+};
+
+const ModelRationDataGrid = ({ ratio, onChange }) => {
+ const [rows, setRows] = useState([]);
+ const [rowModesModel, setRowModesModel] = useState({});
+
+ const setRatio = useCallback(
+ (ratioRow) => {
+ let ratioJson = {};
+ ratioRow.forEach((row) => {
+ ratioJson[row.model] = [row.input, row.complete];
+ });
+ onChange({ target: { name: 'ModelRatio', value: JSON.stringify(ratioJson, null, 2) } });
+ },
+ [onChange]
+ );
+
+ const handleEditClick = useCallback(
+ (id) => () => {
+ setRowModesModel({ ...rowModesModel, [id]: { mode: GridRowModes.Edit } });
+ },
+ [rowModesModel]
+ );
+
+ const handleSaveClick = useCallback(
+ (id) => () => {
+ setRowModesModel({ ...rowModesModel, [id]: { mode: GridRowModes.View } });
+ },
+ [rowModesModel]
+ );
+
+ const handleDeleteClick = useCallback(
+ (id) => () => {
+ setRatio(rows.filter((row) => row.id !== id));
+ },
+ [rows, setRatio]
+ );
+
+ const handleCancelClick = useCallback(
+ (id) => () => {
+ setRowModesModel({
+ ...rowModesModel,
+ [id]: { mode: GridRowModes.View, ignoreModifications: true }
+ });
+
+ const editedRow = rows.find((row) => row.id === id);
+ if (editedRow.isNew) {
+ setRows(rows.filter((row) => row.id !== id));
+ }
+ },
+ [rowModesModel, rows]
+ );
+
+ const processRowUpdate = (newRow, oldRows) => {
+ if (!newRow.isNew && newRow.model === oldRows.model && newRow.input === oldRows.input && newRow.complete === oldRows.complete) {
+ return oldRows;
+ }
+ const updatedRow = { ...newRow, isNew: false };
+ const error = validation(updatedRow, rows);
+ if (error) {
+ return Promise.reject(new Error(error));
+ }
+ setRatio(rows.map((row) => (row.id === newRow.id ? updatedRow : row)));
+ return updatedRow;
+ };
+
+ const handleProcessRowUpdateError = useCallback((error) => {
+ showError(error.message);
+ }, []);
+
+ const handleRowModesModelChange = (newRowModesModel) => {
+ setRowModesModel(newRowModesModel);
+ };
+
+ const modelRatioColumns = useMemo(
+ () => [
+ {
+ field: 'model',
+ sortable: true,
+ headerName: '模型名称',
+ width: 220,
+ editable: true,
+ hideable: false
+ },
+ {
+ field: 'input',
+ sortable: false,
+ headerName: '输入倍率',
+ width: 150,
+ type: 'number',
+ editable: true,
+ valueFormatter: (params) => {
+ if (params.value == null) {
+ return '';
+ }
+ return `$${parseFloat(params.value * 0.002).toFixed(4)} / ¥${parseFloat(params.value * 0.014).toFixed(4)}`;
+ },
+ hideable: false
+ },
+ {
+ field: 'complete',
+ sortable: false,
+ headerName: '完成倍率',
+ width: 150,
+ type: 'number',
+ editable: true,
+ valueFormatter: (params) => {
+ if (params.value == null) {
+ return '';
+ }
+ return `$${parseFloat(params.value * 0.002).toFixed(4)} / ¥${parseFloat(params.value * 0.014).toFixed(4)}`;
+ },
+ hideable: false
+ },
+ {
+ field: 'actions',
+ type: 'actions',
+ headerName: '操作',
+ width: 100,
+ cellClassName: 'actions',
+ hideable: false,
+ getActions: ({ id }) => {
+ const isInEditMode = rowModesModel[id]?.mode === GridRowModes.Edit;
+
+ if (isInEditMode) {
+ return [
+ }
+ key={'Save-' + id}
+ label="Save"
+ sx={{
+ color: 'primary.main'
+ }}
+ onClick={handleSaveClick(id)}
+ />,
+ }
+ key={'Cancel-' + id}
+ label="Cancel"
+ className="textPrimary"
+ onClick={handleCancelClick(id)}
+ color="inherit"
+ />
+ ];
+ }
+
+ return [
+ }
+ label="Edit"
+ className="textPrimary"
+ onClick={handleEditClick(id)}
+ color="inherit"
+ />,
+ }
+ label="Delete"
+ onClick={handleDeleteClick(id)}
+ color="inherit"
+ />
+ ];
+ }
+ }
+ ],
+ [handleEditClick, handleSaveClick, handleDeleteClick, handleCancelClick, rowModesModel]
+ );
+
+ useEffect(() => {
+ let modelRatioList = [];
+ let itemJson = JSON.parse(ratio);
+ let id = 0;
+ for (let key in itemJson) {
+ modelRatioList.push({ id: id++, model: key, input: itemJson[key][0], complete: itemJson[key][1] });
+ }
+ setRows(modelRatioList);
+ }, [ratio]);
+
+ return (
+
+
+
+ );
+};
+
+ModelRationDataGrid.propTypes = {
+ ratio: PropTypes.string.isRequired,
+ onChange: PropTypes.func.isRequired
+};
+
+export default ModelRationDataGrid;
diff --git a/web/src/views/Setting/component/OperationSetting.js b/web/src/views/Setting/component/OperationSetting.js
index 27af978c..ec2bc715 100644
--- a/web/src/views/Setting/component/OperationSetting.js
+++ b/web/src/views/Setting/component/OperationSetting.js
@@ -1,11 +1,12 @@
import { useState, useEffect } from 'react';
import SubCard from 'ui-component/cards/SubCard';
-import { Stack, FormControl, InputLabel, OutlinedInput, Checkbox, Button, FormControlLabel, TextField } from '@mui/material';
+import { Stack, FormControl, InputLabel, OutlinedInput, Checkbox, Button, FormControlLabel, TextField, Alert, Switch } from '@mui/material';
import { showSuccess, showError, verifyJSON } from 'utils/common';
import { API } from 'utils/api';
import { AdapterDayjs } from '@mui/x-date-pickers/AdapterDayjs';
import { LocalizationProvider } from '@mui/x-date-pickers/LocalizationProvider';
import { DateTimePicker } from '@mui/x-date-pickers/DateTimePicker';
+import ModelRationDataGrid from './ModelRationDataGrid';
import dayjs from 'dayjs';
require('dayjs/locale/zh-cn');
@@ -32,6 +33,7 @@ const OperationSetting = () => {
RetryTimes: 0
});
const [originInputs, setOriginInputs] = useState({});
+ const [newModelRatioView, setNewModelRatioView] = useState(false);
let [loading, setLoading] = useState(false);
let [historyTimestamp, setHistoryTimestamp] = useState(now.getTime() / 1000 - 30 * 24 * 3600); // a month ago new Date().getTime() / 1000 + 3600
@@ -431,21 +433,6 @@ const OperationSetting = () => {
-
-
-
-
{
placeholder="为一个 JSON 文本,键为分组名称,值为倍率"
/>
+
+
+
+ 配置格式为 JSON 文本,键为模型名称;值第一位为输入倍率,第二位为完成倍率,如果只有单一倍率则两者值相同。
+
美元:1 === $0.002 / 1K tokens 人民币: 1 === ¥0.014 / 1k tokens
+
例如:
gpt-4 输入: $0.03 / 1K tokens 完成:$0.06 / 1K tokens
+ 0.03 / 0.002 = 15, 0.06 / 0.002 = 30,即输入倍率为 15,完成倍率为 30
+
+ {
+ setNewModelRatioView(!newModelRatioView);
+ }}
+ />
+ }
+ label="使用新编辑器"
+ />
+
+
+ {newModelRatioView ? (
+
+ ) : (
+
+
+
+ )}