diff --git a/common/model-ratio.go b/common/model-ratio.go index 91d8043e..d2824bce 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -7,7 +7,7 @@ import ( ) type ModelType struct { - Ratio float64 + Ratio []float64 Type int } @@ -20,102 +20,156 @@ var ModelTypes map[string]ModelType // TODO: when a new api is enabled, check the pricing here // 1 === $0.002 / 1K tokens // 1 === ¥0.014 / 1k tokens -var ModelRatio map[string]float64 +var ModelRatio map[string][]float64 func init() { ModelTypes = map[string]ModelType{ - "gpt-4": {15, ChannelTypeOpenAI}, - "gpt-4-0314": {15, ChannelTypeOpenAI}, - "gpt-4-0613": {15, ChannelTypeOpenAI}, - "gpt-4-32k": {30, ChannelTypeOpenAI}, - "gpt-4-32k-0314": {30, ChannelTypeOpenAI}, - "gpt-4-32k-0613": {30, ChannelTypeOpenAI}, - "gpt-4-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens - "gpt-4-1106-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens - "gpt-4-0125-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens - "gpt-4-vision-preview": {5, ChannelTypeOpenAI}, // $0.01 / 1K tokens - "gpt-3.5-turbo": {0.75, ChannelTypeOpenAI}, // $0.0015 / 1K tokens - "gpt-3.5-turbo-0301": {0.75, ChannelTypeOpenAI}, - "gpt-3.5-turbo-0613": {0.75, ChannelTypeOpenAI}, - "gpt-3.5-turbo-16k": {1.5, ChannelTypeOpenAI}, // $0.003 / 1K tokens - "gpt-3.5-turbo-16k-0613": {1.5, ChannelTypeOpenAI}, - "gpt-3.5-turbo-instruct": {0.75, ChannelTypeOpenAI}, // $0.0015 / 1K tokens - "gpt-3.5-turbo-1106": {0.5, ChannelTypeOpenAI}, // $0.001 / 1K tokens - "gpt-3.5-turbo-0125": {0.25, ChannelTypeOpenAI}, // $0.0005 / $0.0015 / 1K tokens - "davinci-002": {1, ChannelTypeOpenAI}, // $0.002 / 1K tokens - "babbage-002": {0.2, ChannelTypeOpenAI}, // $0.002 / 1K tokens - "text-ada-001": {0.2, ChannelTypeOpenAI}, - "text-babbage-001": {0.25, ChannelTypeOpenAI}, - "text-curie-001": {1, ChannelTypeOpenAI}, - "text-davinci-002": {10, ChannelTypeOpenAI}, - "text-davinci-003": {10, ChannelTypeOpenAI}, - "text-davinci-edit-001": {10, ChannelTypeOpenAI}, - "code-davinci-edit-001": {10, ChannelTypeOpenAI}, - "whisper-1": {15, ChannelTypeOpenAI}, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens - "tts-1": {7.5, ChannelTypeOpenAI}, // $0.015 / 1K characters - "tts-1-1106": {7.5, ChannelTypeOpenAI}, - "tts-1-hd": {15, ChannelTypeOpenAI}, // $0.030 / 1K characters - "tts-1-hd-1106": {15, ChannelTypeOpenAI}, - "davinci": {10, ChannelTypeOpenAI}, - "curie": {10, ChannelTypeOpenAI}, - "babbage": {10, ChannelTypeOpenAI}, - "ada": {10, ChannelTypeOpenAI}, - "text-embedding-ada-002": {0.05, ChannelTypeOpenAI}, - "text-embedding-3-small": {0.01, ChannelTypeOpenAI}, // $0.00002 / 1K tokens - "text-embedding-3-large": {0.065, ChannelTypeOpenAI}, // $0.00013 / 1K tokens - "text-search-ada-doc-001": {10, ChannelTypeOpenAI}, - "text-moderation-stable": {0.1, ChannelTypeOpenAI}, - "text-moderation-latest": {0.1, ChannelTypeOpenAI}, - "dall-e-2": {8, ChannelTypeOpenAI}, // $0.016 - $0.020 / image - "dall-e-3": {20, ChannelTypeOpenAI}, // $0.040 - $0.120 / image - "claude-instant-1": {0.815, ChannelTypeAnthropic}, // $1.63 / 1M tokens - "claude-2": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens - "claude-2.0": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens - "claude-2.1": {5.51, ChannelTypeAnthropic}, // $11.02 / 1M tokens - "ERNIE-Bot": {0.8572, ChannelTypeBaidu}, // ¥0.012 / 1k tokens - "ERNIE-Bot-turbo": {0.5715, ChannelTypeBaidu}, // ¥0.008 / 1k tokens - "ERNIE-Bot-4": {8.572, ChannelTypeBaidu}, // ¥0.12 / 1k tokens - "Embedding-V1": {0.1429, ChannelTypeBaidu}, // ¥0.002 / 1k tokens - "PaLM-2": {1, ChannelTypePaLM}, - "gemini-pro": {1, ChannelTypeGemini}, // $0.00025 / 1k characters -> $0.001 / 1k tokens - "gemini-pro-vision": {1, ChannelTypeGemini}, // $0.00025 / 1k characters -> $0.001 / 1k tokens - "chatglm_turbo": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens - "chatglm_pro": {0.7143, ChannelTypeZhipu}, // ¥0.01 / 1k tokens - "chatglm_std": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens - "chatglm_lite": {0.1429, ChannelTypeZhipu}, // ¥0.002 / 1k tokens - "glm-3-turbo": {0.3572, ChannelTypeZhipu}, // ¥0.005 / 1k tokens - "glm-4": {7.143, ChannelTypeZhipu}, // ¥0.1 / 1k tokens - "glm-4v": {7.143, ChannelTypeZhipu}, // ¥0.1 / 1k tokens - "embedding-2": {0.0357, ChannelTypeZhipu}, // ¥0.0005 / 1k tokens - "cogview-3": {17.8571, ChannelTypeZhipu}, // ¥0.25 / 1张图片 - "qwen-turbo": {0.5715, ChannelTypeAli}, // ¥0.008 / 1k tokens // https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing - "qwen-plus": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens - "qwen-max": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens - "qwen-max-longcontext": {1.4286, ChannelTypeAli}, // ¥0.02 / 1k tokens - "qwen-vl-plus": {0.5715, ChannelTypeAli}, // ¥0.008 / 1k tokens - "text-embedding-v1": {0.05, ChannelTypeAli}, // ¥0.0007 / 1k tokens - "SparkDesk": {1.2858, ChannelTypeXunfei}, // ¥0.018 / 1k tokens - "360GPT_S2_V9": {0.8572, ChannelType360}, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens - "embedding_s1_v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": {0.0715, ChannelType360}, // ¥0.001 / 1k tokens - "hunyuan": {7.143, ChannelTypeTencent}, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 - "Baichuan2-Turbo": {0.5715, ChannelTypeBaichuan}, // ¥0.008 / 1k tokens - "Baichuan2-Turbo-192k": {1.143, ChannelTypeBaichuan}, // ¥0.016 / 1k tokens - "Baichuan2-53B": {1.4286, ChannelTypeBaichuan}, // ¥0.02 / 1k tokens - "Baichuan-Text-Embedding": {0.0357, ChannelTypeBaichuan}, // ¥0.0005 / 1k tokens - "abab5.5s-chat": {0.3572, ChannelTypeMiniMax}, // ¥0.005 / 1k tokens - "abab5.5-chat": {1.0714, ChannelTypeMiniMax}, // ¥0.015 / 1k tokens - "abab6-chat": {14.2857, ChannelTypeMiniMax}, // ¥0.2 / 1k tokens - "embo-01": {0.0357, ChannelTypeMiniMax}, // ¥0.0005 / 1k tokens - "deepseek-coder": {0.75, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens - "deepseek-chat": {0.75, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens - "moonshot-v1-8k": {0.8572, ChannelTypeMoonshot}, // ¥0.012 / 1K tokens - "moonshot-v1-32k": {1.7143, ChannelTypeMoonshot}, // ¥0.024 / 1K tokens - "moonshot-v1-128k": {4.2857, ChannelTypeMoonshot}, // ¥0.06 / 1K tokens + // $0.03 / 1K tokens $0.06 / 1K tokens + "gpt-4": {[]float64{15, 30}, ChannelTypeOpenAI}, + "gpt-4-0314": {[]float64{15, 30}, ChannelTypeOpenAI}, + "gpt-4-0613": {[]float64{15, 30}, ChannelTypeOpenAI}, + // $0.06 / 1K tokens $0.12 / 1K tokens + "gpt-4-32k": {[]float64{30, 60}, ChannelTypeOpenAI}, + "gpt-4-32k-0314": {[]float64{30, 60}, ChannelTypeOpenAI}, + "gpt-4-32k-0613": {[]float64{30, 60}, ChannelTypeOpenAI}, + // $0.01 / 1K tokens $0.03 / 1K tokens + "gpt-4-preview": {[]float64{5, 15}, ChannelTypeOpenAI}, + "gpt-4-1106-preview": {[]float64{5, 15}, ChannelTypeOpenAI}, + "gpt-4-0125-preview": {[]float64{5, 15}, ChannelTypeOpenAI}, + "gpt-4-vision-preview": {[]float64{5, 15}, ChannelTypeOpenAI}, + // $0.0005 / 1K tokens $0.0015 / 1K tokens + "gpt-3.5-turbo": {[]float64{0.25, 0.75}, ChannelTypeOpenAI}, + "gpt-3.5-turbo-0125": {[]float64{0.25, 0.75}, ChannelTypeOpenAI}, + // $0.0015 / 1K tokens $0.002 / 1K tokens + "gpt-3.5-turbo-0301": {[]float64{0.75, 1}, ChannelTypeOpenAI}, + "gpt-3.5-turbo-0613": {[]float64{0.75, 1}, ChannelTypeOpenAI}, + "gpt-3.5-turbo-instruct": {[]float64{0.75, 1}, ChannelTypeOpenAI}, + // $0.003 / 1K tokens $0.004 / 1K tokens + "gpt-3.5-turbo-16k": {[]float64{1.5, 2}, ChannelTypeOpenAI}, + "gpt-3.5-turbo-16k-0613": {[]float64{1.5, 2}, ChannelTypeOpenAI}, + // $0.001 / 1K tokens $0.002 / 1K tokens + "gpt-3.5-turbo-1106": {[]float64{0.5, 1}, ChannelTypeOpenAI}, + // $0.0020 / 1K tokens + "davinci-002": {[]float64{1, 1}, ChannelTypeOpenAI}, + // $0.0004 / 1K tokens + "babbage-002": {[]float64{0.2, 0.2}, ChannelTypeOpenAI}, + "text-ada-001": {[]float64{0.2, 0.2}, ChannelTypeOpenAI}, + "text-babbage-001": {[]float64{0.25, 0.25}, ChannelTypeOpenAI}, + "text-curie-001": {[]float64{1, 1}, ChannelTypeOpenAI}, + "text-davinci-002": {[]float64{10, 10}, ChannelTypeOpenAI}, + "text-davinci-003": {[]float64{10, 10}, ChannelTypeOpenAI}, + "text-davinci-edit-001": {[]float64{10, 10}, ChannelTypeOpenAI}, + "code-davinci-edit-001": {[]float64{10, 10}, ChannelTypeOpenAI}, + // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens + "whisper-1": {[]float64{15, 15}, ChannelTypeOpenAI}, + // $0.015 / 1K characters + "tts-1": {[]float64{7.5, 7.5}, ChannelTypeOpenAI}, + "tts-1-1106": {[]float64{7.5, 7.5}, ChannelTypeOpenAI}, + // $0.030 / 1K characters + "tts-1-hd": {[]float64{15, 15}, ChannelTypeOpenAI}, + "tts-1-hd-1106": {[]float64{15, 15}, ChannelTypeOpenAI}, + "davinci": {[]float64{10, 10}, ChannelTypeOpenAI}, + "curie": {[]float64{10, 10}, ChannelTypeOpenAI}, + "babbage": {[]float64{10, 10}, ChannelTypeOpenAI}, + "ada": {[]float64{10, 10}, ChannelTypeOpenAI}, + "text-embedding-ada-002": {[]float64{0.05, 0.05}, ChannelTypeOpenAI}, + // $0.00002 / 1K tokens + "text-embedding-3-small": {[]float64{0.01, 0.01}, ChannelTypeOpenAI}, + // $0.00013 / 1K tokens + "text-embedding-3-large": {[]float64{0.065, 0.065}, ChannelTypeOpenAI}, + "text-search-ada-doc-001": {[]float64{10, 10}, ChannelTypeOpenAI}, + "text-moderation-stable": {[]float64{0.1, 0.1}, ChannelTypeOpenAI}, + "text-moderation-latest": {[]float64{0.1, 0.1}, ChannelTypeOpenAI}, + // $0.016 - $0.020 / image + "dall-e-2": {[]float64{8, 8}, ChannelTypeOpenAI}, + // $0.040 - $0.120 / image + "dall-e-3": {[]float64{20, 20}, ChannelTypeOpenAI}, + + // $0.80/million tokens $2.40/million tokens + "claude-instant-1": {[]float64{0.4, 1.2}, ChannelTypeAnthropic}, + // $8.00/million tokens $24.00/million tokens + "claude-2": {[]float64{4, 12}, ChannelTypeAnthropic}, + "claude-2.0": {[]float64{4, 12}, ChannelTypeAnthropic}, + "claude-2.1": {[]float64{4, 12}, ChannelTypeAnthropic}, + + // ¥0.012 / 1k tokens ¥0.012 / 1k tokens + "ERNIE-Bot": {[]float64{0.8572, 0.8572}, ChannelTypeBaidu}, + // 0.024元/千tokens 0.048元/千tokens + "ERNIE-Bot-8k": {[]float64{1.7143, 3.4286}, ChannelTypeBaidu}, + // ¥0.008 / 1k tokens ¥0.008 / 1k tokens + "ERNIE-Bot-turbo": {[]float64{0.5715, 0.5715}, ChannelTypeBaidu}, + // ¥0.12 / 1k tokens ¥0.12 / 1k tokens + "ERNIE-Bot-4": {[]float64{8.572, 8.572}, ChannelTypeBaidu}, + // ¥0.002 / 1k tokens + "Embedding-V1": {[]float64{0.1429, 0.1429}, ChannelTypeBaidu}, + + "PaLM-2": {[]float64{1, 1}, ChannelTypePaLM}, + "gemini-pro": {[]float64{1, 1}, ChannelTypeGemini}, + "gemini-pro-vision": {[]float64{1, 1}, ChannelTypeGemini}, + + // ¥0.005 / 1k tokens + "chatglm_turbo": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu}, + "chatglm_std": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu}, + "glm-3-turbo": {[]float64{0.3572, 0.3572}, ChannelTypeZhipu}, + // ¥0.01 / 1k tokens + "chatglm_pro": {[]float64{0.7143, 0.7143}, ChannelTypeZhipu}, + // ¥0.002 / 1k tokens + "chatglm_lite": {[]float64{0.1429, 0.1429}, ChannelTypeZhipu}, + // ¥0.1 / 1k tokens + "glm-4": {[]float64{7.143, 7.143}, ChannelTypeZhipu}, + "glm-4v": {[]float64{7.143, 7.143}, ChannelTypeZhipu}, + // ¥0.0005 / 1k tokens + "embedding-2": {[]float64{0.0357, 0.0357}, ChannelTypeZhipu}, + // ¥0.25 / 1张图片 + "cogview-3": {[]float64{17.8571, 17.8571}, ChannelTypeZhipu}, + + // ¥0.008 / 1k tokens + "qwen-turbo": {[]float64{0.5715, 0.5715}, ChannelTypeAli}, + // ¥0.02 / 1k tokens + "qwen-plus": {[]float64{1.4286, 1.4286}, ChannelTypeAli}, + "qwen-max": {[]float64{1.4286, 1.4286}, ChannelTypeAli}, + "qwen-max-longcontext": {[]float64{1.4286, 1.4286}, ChannelTypeAli}, + "qwen-vl": {[]float64{0.5715, 0.5715}, ChannelTypeAli}, + "qwen-vl-plus": {[]float64{0.5715, 0.5715}, ChannelTypeAli}, + // ¥0.0007 / 1k tokens + "text-embedding-v1": {[]float64{0.05, 0.05}, ChannelTypeAli}, + + // ¥0.018 / 1k tokens + "SparkDesk": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei}, + "SparkDesk-v1.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei}, + "SparkDesk-v2.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei}, + "SparkDesk-v3.1": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei}, + "SparkDesk-v3.5": {[]float64{1.2858, 1.2858}, ChannelTypeXunfei}, + + // ¥0.012 / 1k tokens + "360GPT_S2_V9": {[]float64{0.8572, 0.8572}, ChannelType360}, + // ¥0.001 / 1k tokens + "embedding-bert-512-v1": {[]float64{0.0715, 0.0715}, ChannelType360}, + "embedding_s1_v1": {[]float64{0.0715, 0.0715}, ChannelType360}, + "semantic_similarity_s1_v1": {[]float64{0.0715, 0.0715}, ChannelType360}, + + // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 + "hunyuan": {[]float64{7.143, 7.143}, ChannelTypeTencent}, + + "Baichuan2-Turbo": {[]float64{0.5715, 0.5715}, ChannelTypeBaichuan}, // ¥0.008 / 1k tokens + "Baichuan2-Turbo-192k": {[]float64{1.143, 1.143}, ChannelTypeBaichuan}, // ¥0.016 / 1k tokens + "Baichuan2-53B": {[]float64{1.4286, 1.4286}, ChannelTypeBaichuan}, // ¥0.02 / 1k tokens + "Baichuan-Text-Embedding": {[]float64{0.0357, 0.0357}, ChannelTypeBaichuan}, // ¥0.0005 / 1k tokens + + "abab5.5s-chat": {[]float64{0.3572, 0.3572}, ChannelTypeMiniMax}, // ¥0.005 / 1k tokens + "abab5.5-chat": {[]float64{1.0714, 1.0714}, ChannelTypeMiniMax}, // ¥0.015 / 1k tokens + "abab6-chat": {[]float64{14.2857, 14.2857}, ChannelTypeMiniMax}, // ¥0.2 / 1k tokens + "embo-01": {[]float64{0.0357, 0.0357}, ChannelTypeMiniMax}, // ¥0.0005 / 1k tokens + + "deepseek-coder": {[]float64{0.75, 0.75}, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens + "deepseek-chat": {[]float64{0.75, 0.75}, ChannelTypeDeepseek}, // 暂定 $0.0015 / 1K tokens + + "moonshot-v1-8k": {[]float64{0.8572, 0.8572}, ChannelTypeMoonshot}, // ¥0.012 / 1K tokens + "moonshot-v1-32k": {[]float64{1.7143, 1.7143}, ChannelTypeMoonshot}, // ¥0.024 / 1K tokens + "moonshot-v1-128k": {[]float64{4.2857, 4.2857}, ChannelTypeMoonshot}, // ¥0.06 / 1K tokens } - ModelRatio = make(map[string]float64) + ModelRatio = make(map[string][]float64) for name, modelType := range ModelTypes { ModelRatio[name] = modelType.Ratio } @@ -153,18 +207,25 @@ func ModelRatio2JSONString() string { } func UpdateModelRatioByJSONString(jsonStr string) error { - ModelRatio = make(map[string]float64) + ModelRatio = make(map[string][]float64) return json.Unmarshal([]byte(jsonStr), &ModelRatio) } func MergeModelRatioByJSONString(jsonStr string) (newJsonStr string, err error) { - inputModelRatio := make(map[string]float64) + isNew := false + inputModelRatio := make(map[string][]float64) err = json.Unmarshal([]byte(jsonStr), &inputModelRatio) if err != nil { - return + inputModelRatioOld := make(map[string]float64) + err = json.Unmarshal([]byte(jsonStr), &inputModelRatioOld) + if err != nil { + return + } + + inputModelRatio = UpdateModeRatioFormat(inputModelRatioOld) + isNew = true } - isNew := false // 与现有的ModelRatio进行比较,如果有新增的模型,需要添加 for key, value := range ModelRatio { if _, ok := inputModelRatio[key]; !ok { @@ -186,14 +247,23 @@ func MergeModelRatioByJSONString(jsonStr string) (newJsonStr string, err error) return } -func GetModelRatio(name string) float64 { +func UpdateModeRatioFormat(modelRatioOld map[string]float64) map[string][]float64 { + modelRatioNew := make(map[string][]float64) + for key, value := range modelRatioOld { + completionRatio := GetCompletionRatio(key) * value + modelRatioNew[key] = []float64{value, completionRatio} + } + return modelRatioNew +} + +func GetModelRatio(name string) []float64 { if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") { name = strings.TrimSuffix(name, "-internet") } ratio, ok := ModelRatio[name] if !ok { SysError("model ratio not found: " + name) - return 30 + return []float64{30, 30} } return ratio } diff --git a/controller/quota.go b/controller/quota.go index e8a083a1..5bb9f6d4 100644 --- a/controller/quota.go +++ b/controller/quota.go @@ -18,7 +18,7 @@ type QuotaInfo struct { modelName string promptTokens int preConsumedTokens int - modelRatio float64 + modelRatio []float64 groupRatio float64 ratio float64 preConsumedQuota int @@ -51,7 +51,7 @@ func (q *QuotaInfo) initQuotaInfo(groupName string) { modelRatio := common.GetModelRatio(q.modelName) groupRatio := common.GetGroupRatio(groupName) preConsumedTokens := common.PreConsumedQuota - ratio := modelRatio * groupRatio + ratio := modelRatio[0] * groupRatio preConsumedQuota := int(float64(q.promptTokens+preConsumedTokens) * ratio) q.preConsumedTokens = preConsumedTokens @@ -97,10 +97,10 @@ func (q *QuotaInfo) preQuotaConsumption() *types.OpenAIErrorWithStatusCode { func (q *QuotaInfo) completedQuotaConsumption(usage *types.Usage, tokenName string, ctx context.Context) error { quota := 0 - completionRatio := common.GetCompletionRatio(q.modelName) + completionRatio := q.modelRatio[1] * q.groupRatio promptTokens := usage.PromptTokens completionTokens := usage.CompletionTokens - quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * q.ratio)) + quota = int(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio))) if q.ratio != 0 && quota <= 0 { quota = 1 } @@ -128,8 +128,14 @@ func (q *QuotaInfo) completedQuotaConsumption(usage *types.Usage, tokenName stri requestTime = int(time.Since(requestStartTime).Milliseconds()) } } + var modelRatioStr string + if q.modelRatio[0] == q.modelRatio[1] { + modelRatioStr = fmt.Sprintf("%.2f", q.modelRatio[0]) + } else { + modelRatioStr = fmt.Sprintf("%.2f (输入)/%.2f (输出)", q.modelRatio[0], q.modelRatio[1]) + } - logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", q.modelRatio, q.groupRatio) + logContent := fmt.Sprintf("模型倍率 %s,分组倍率 %.2f", modelRatioStr, q.groupRatio) model.RecordConsumeLog(ctx, q.userId, q.channelId, promptTokens, completionTokens, q.modelName, tokenName, quota, logContent, requestTime) model.UpdateUserUsedQuotaAndRequestCount(q.userId, quota) model.UpdateChannelUsedQuota(q.channelId, quota) diff --git a/modelRatio.json b/modelRatio.json new file mode 100644 index 00000000..b509e58b --- /dev/null +++ b/modelRatio.json @@ -0,0 +1,97 @@ +{ + "gpt-4": [15, 30], + "gpt-4-0314": [15, 30], + "gpt-4-0613": [15, 30], + "gpt-4-32k": [30, 60], + "gpt-4-32k-0314": [30, 60], + "gpt-4-32k-0613": [30, 60], + "gpt-4-preview": [5, 15], + "gpt-4-1106-preview": [5, 15], + "gpt-4-0125-preview": [5, 15], + "gpt-4-vision-preview": [5, 15], + "gpt-3.5-turbo": [0.25, 0.75], + "gpt-3.5-turbo-0125": [0.25, 0.75], + "gpt-3.5-turbo-0301": [0.75, 1], + "gpt-3.5-turbo-0613": [0.75, 1], + "gpt-3.5-turbo-instruct": [0.75, 1], + "gpt-3.5-turbo-16k": [1.5, 2], + "gpt-3.5-turbo-16k-0613": [1.5, 2], + "gpt-3.5-turbo-1106": [0.5, 1], + "davinci-002": [1, 1], + "babbage-002": [0.2, 0.2], + "text-ada-001": [0.2, 0.2], + "text-babbage-001": [0.25, 0.25], + "text-curie-001": [1, 1], + "text-davinci-002": [10, 10], + "text-davinci-003": [10, 10], + "text-davinci-edit-001": [10, 10], + "code-davinci-edit-001": [10, 10], + "whisper-1": [15, 15], + "tts-1": [7.5, 7.5], + "tts-1-1106": [7.5, 7.5], + "tts-1-hd": [15, 15], + "tts-1-hd-1106": [15, 15], + "davinci": [10, 10], + "curie": [10, 10], + "babbage": [10, 10], + "ada": [10, 10], + "text-embedding-ada-002": [0.05, 0.05], + "text-embedding-3-small": [0.01, 0.01], + "text-embedding-3-large": [0.065, 0.065], + "text-search-ada-doc-001": [10, 10], + "text-moderation-stable": [0.1, 0.1], + "text-moderation-latest": [0.1, 0.1], + "dall-e-2": [8, 8], + "dall-e-3": [20, 20], + "claude-instant-1": [0.4, 1.2], + "claude-2": [4, 12], + "claude-2.0": [4, 12], + "claude-2.1": [4, 12], + "ERNIE-Bot": [0.8572, 0.8572], + "ERNIE-Bot-8k": [1.7143, 3.4286], + "ERNIE-Bot-turbo": [0.5715, 0.5715], + "ERNIE-Bot-4": [8.572, 8.572], + "Embedding-V1": [0.1429, 0.1429], + "PaLM-2": [1, 1], + "gemini-pro": [1, 1], + "gemini-pro-vision": [1, 1], + "chatglm_turbo": [0.3572, 0.3572], + "chatglm_std": [0.3572, 0.3572], + "glm-3-turbo": [0.3572, 0.3572], + "chatglm_pro": [0.7143, 0.7143], + "chatglm_lite": [0.1429, 0.1429], + "glm-4": [7.143, 7.143], + "glm-4v": [7.143, 7.143], + "embedding-2": [0.0357, 0.0357], + "cogview-3": [17.8571, 17.8571], + "qwen-turbo": [0.5715, 0.5715], + "qwen-plus": [1.4286, 1.4286], + "qwen-max": [1.4286, 1.4286], + "qwen-max-longcontext": [1.4286, 1.4286], + "qwen-vl": [0.5715, 0.5715], + "qwen-vl-plus": [0.5715, 0.5715], + "text-embedding-v1": [0.05, 0.05], + "SparkDesk": [1.2858, 1.2858], + "SparkDesk-v1.1": [1.2858, 1.2858], + "SparkDesk-v2.1": [1.2858, 1.2858], + "SparkDesk-v3.1": [1.2858, 1.2858], + "SparkDesk-v3.5": [1.2858, 1.2858], + "360GPT_S2_V9": [0.8572, 0.8572], + "embedding-bert-512-v1": [0.0715, 0.0715], + "embedding_s1_v1": [0.0715, 0.0715], + "semantic_similarity_s1_v1": [0.0715, 0.0715], + "hunyuan": [7.143, 7.143], + "Baichuan2-Turbo": [0.5715, 0.5715], + "Baichuan2-Turbo-192k": [1.143, 1.143], + "Baichuan2-53B": [1.4286, 1.4286], + "Baichuan-Text-Embedding": [0.0357, 0.0357], + "abab5.5s-chat": [0.3572, 0.3572], + "abab5.5-chat": [1.0714, 1.0714], + "abab6-chat": [14.2857, 14.2857], + "embo-01": [0.0357, 0.0357], + "deepseek-coder": [0.75, 0.75], + "deepseek-chat": [0.75, 0.75], + "moonshot-v1-8k": [0.8572, 0.8572], + "moonshot-v1-32k": [1.7143, 1.7143], + "moonshot-v1-128k": [4.2857, 4.2857] +} diff --git a/providers/openai/base.go b/providers/openai/base.go index eac3b502..3f11e977 100644 --- a/providers/openai/base.go +++ b/providers/openai/base.go @@ -85,13 +85,6 @@ func (p *OpenAIProvider) GetFullRequestURL(requestURL string, modelName string) if p.IsAzure { apiVersion := p.Channel.Other - // 以-分割,检测modelName 最后一个元素是否为4位数字,必须是数字,如果是则删除modelName最后一个元素 - modelNameSlice := strings.Split(modelName, "-") - lastModelNameSlice := modelNameSlice[len(modelNameSlice)-1] - modelNum := common.String2Int(lastModelNameSlice) - if modelNum > 999 && modelNum < 10000 { - modelName = strings.TrimSuffix(modelName, "-"+lastModelNameSlice) - } // 检测模型是是否包含 . 如果有则直接去掉 modelName = strings.Replace(modelName, ".", "", -1) diff --git a/providers/xunfei/base.go b/providers/xunfei/base.go index fdaf8647..e6a7975d 100644 --- a/providers/xunfei/base.go +++ b/providers/xunfei/base.go @@ -68,7 +68,7 @@ func (p *XunfeiProvider) GetFullRequestURL(requestURL string, modelName string) if len(splits) != 3 { return "" } - domain, authUrl := p.getXunfeiAuthUrl(splits[2], splits[1]) + domain, authUrl := p.getXunfeiAuthUrl(splits[2], splits[1], modelName) p.domain = domain p.apiId = splits[0] @@ -76,20 +76,47 @@ func (p *XunfeiProvider) GetFullRequestURL(requestURL string, modelName string) return authUrl } -func (p *XunfeiProvider) getXunfeiAuthUrl(apiKey string, apiSecret string) (string, string) { +func (p *XunfeiProvider) getAPIVersion(modelName string) string { query := p.Context.Request.URL.Query() apiVersion := query.Get("api-version") - if apiVersion == "" { - apiVersion = p.Channel.Other + if apiVersion != "" { + return apiVersion } - if apiVersion == "" { - apiVersion = "v1.1" - common.SysLog("api_version not found, use default: " + apiVersion) + parts := strings.Split(modelName, "-") + if len(parts) == 2 { + apiVersion = parts[1] + return apiVersion } - domain := "general" - if apiVersion != "v1.1" { - domain += strings.Split(apiVersion, ".")[0] + + apiVersion = p.Channel.Other + if apiVersion != "" { + return apiVersion } + apiVersion = "v1.1" + + common.SysLog("api_version not found, use default: " + apiVersion) + return apiVersion +} + +// https://www.xfyun.cn/doc/spark/Web.html#_1-%E6%8E%A5%E5%8F%A3%E8%AF%B4%E6%98%8E +func apiVersion2domain(apiVersion string) string { + switch apiVersion { + case "v1.1": + return "general" + case "v2.1": + return "generalv2" + case "v3.1": + return "generalv3" + case "v3.5": + return "generalv3.5" + } + return "general" + apiVersion +} + +func (p *XunfeiProvider) getXunfeiAuthUrl(apiKey string, apiSecret string, modelName string) (string, string) { + apiVersion := p.getAPIVersion(modelName) + domain := apiVersion2domain(apiVersion) + authUrl := p.buildXunfeiAuthUrl(fmt.Sprintf("%s/%s/chat", p.Config.BaseURL, apiVersion), apiKey, apiSecret) return domain, authUrl } diff --git a/web/package.json b/web/package.json index 8918ddbc..e376b4b8 100644 --- a/web/package.json +++ b/web/package.json @@ -13,6 +13,7 @@ "@mui/material": "^5.8.6", "@mui/system": "^5.8.6", "@mui/utils": "^5.8.6", + "@mui/x-data-grid": "^6.19.4", "@mui/x-date-pickers": "^6.18.5", "@tabler/icons-react": "^2.44.0", "apexcharts": "^3.35.3", diff --git a/web/src/views/Channel/component/NameLabel.js b/web/src/views/Channel/component/NameLabel.js index 061ed617..612a9356 100644 --- a/web/src/views/Channel/component/NameLabel.js +++ b/web/src/views/Channel/component/NameLabel.js @@ -30,6 +30,9 @@ const NameLabel = ({ name, models }) => { onClick={() => { copy(item, '模型名称'); }} + onTouchEnd={() => { + copy(item, '模型名称'); + }} > {item} @@ -39,6 +42,7 @@ const NameLabel = ({ name, models }) => { } placement="top" + enterTouchDelay={0} > {name} diff --git a/web/src/views/Channel/type/Config.js b/web/src/views/Channel/type/Config.js index c9facb8d..f97b6f98 100644 --- a/web/src/views/Channel/type/Config.js +++ b/web/src/views/Channel/type/Config.js @@ -109,7 +109,7 @@ const typeConfig = { other: '版本号' }, input: { - models: ['SparkDesk'] + models: ['SparkDesk', 'SparkDesk-v1.1', 'SparkDesk-v2.1', 'SparkDesk-v3.1', 'SparkDesk-v3.5'] }, prompt: { key: '按照如下格式输入:APPID|APISecret|APIKey', diff --git a/web/src/views/Setting/component/ModelRationDataGrid.js b/web/src/views/Setting/component/ModelRationDataGrid.js new file mode 100644 index 00000000..edac3ff0 --- /dev/null +++ b/web/src/views/Setting/component/ModelRationDataGrid.js @@ -0,0 +1,275 @@ +import PropTypes from 'prop-types'; +import { useState, useEffect, useMemo, useCallback } from 'react'; +import { GridRowModes, DataGrid, GridToolbarContainer, GridActionsCellItem } from '@mui/x-data-grid'; +import { Box, Button } from '@mui/material'; +import AddIcon from '@mui/icons-material/Add'; +import EditIcon from '@mui/icons-material/Edit'; +import DeleteIcon from '@mui/icons-material/DeleteOutlined'; +import SaveIcon from '@mui/icons-material/Save'; +import CancelIcon from '@mui/icons-material/Close'; +import { showError } from 'utils/common'; + +function validation(row, rows) { + if (row.model === '') { + return '模型名称不能为空'; + } + + // 判断 model是否是唯一值 + if (rows.filter((r) => r.model === row.model && (row.isNew || r.id !== row.id)).length > 0) { + return '模型名称不能重复'; + } + + if (row.input === '' || row.input < 0) { + return '输入倍率必须大于等于0'; + } + if (row.complete === '' || row.complete < 0) { + return '完成倍率必须大于等于0'; + } + return false; +} + +function randomId() { + return Math.random().toString(36).substr(2, 9); +} + +function EditToolbar({ setRows, setRowModesModel }) { + const handleClick = () => { + const id = randomId(); + setRows((oldRows) => [{ id, model: '', input: 0, complete: 0, isNew: true }, ...oldRows]); + setRowModesModel((oldModel) => ({ + [id]: { mode: GridRowModes.Edit, fieldToFocus: 'name' }, + ...oldModel + })); + }; + + return ( + + + + ); +} + +EditToolbar.propTypes = { + setRows: PropTypes.func.isRequired, + setRowModesModel: PropTypes.func.isRequired +}; + +const ModelRationDataGrid = ({ ratio, onChange }) => { + const [rows, setRows] = useState([]); + const [rowModesModel, setRowModesModel] = useState({}); + + const setRatio = useCallback( + (ratioRow) => { + let ratioJson = {}; + ratioRow.forEach((row) => { + ratioJson[row.model] = [row.input, row.complete]; + }); + onChange({ target: { name: 'ModelRatio', value: JSON.stringify(ratioJson, null, 2) } }); + }, + [onChange] + ); + + const handleEditClick = useCallback( + (id) => () => { + setRowModesModel({ ...rowModesModel, [id]: { mode: GridRowModes.Edit } }); + }, + [rowModesModel] + ); + + const handleSaveClick = useCallback( + (id) => () => { + setRowModesModel({ ...rowModesModel, [id]: { mode: GridRowModes.View } }); + }, + [rowModesModel] + ); + + const handleDeleteClick = useCallback( + (id) => () => { + setRatio(rows.filter((row) => row.id !== id)); + }, + [rows, setRatio] + ); + + const handleCancelClick = useCallback( + (id) => () => { + setRowModesModel({ + ...rowModesModel, + [id]: { mode: GridRowModes.View, ignoreModifications: true } + }); + + const editedRow = rows.find((row) => row.id === id); + if (editedRow.isNew) { + setRows(rows.filter((row) => row.id !== id)); + } + }, + [rowModesModel, rows] + ); + + const processRowUpdate = (newRow, oldRows) => { + if (!newRow.isNew && newRow.model === oldRows.model && newRow.input === oldRows.input && newRow.complete === oldRows.complete) { + return oldRows; + } + const updatedRow = { ...newRow, isNew: false }; + const error = validation(updatedRow, rows); + if (error) { + return Promise.reject(new Error(error)); + } + setRatio(rows.map((row) => (row.id === newRow.id ? updatedRow : row))); + return updatedRow; + }; + + const handleProcessRowUpdateError = useCallback((error) => { + showError(error.message); + }, []); + + const handleRowModesModelChange = (newRowModesModel) => { + setRowModesModel(newRowModesModel); + }; + + const modelRatioColumns = useMemo( + () => [ + { + field: 'model', + sortable: true, + headerName: '模型名称', + width: 220, + editable: true, + hideable: false + }, + { + field: 'input', + sortable: false, + headerName: '输入倍率', + width: 150, + type: 'number', + editable: true, + valueFormatter: (params) => { + if (params.value == null) { + return ''; + } + return `$${parseFloat(params.value * 0.002).toFixed(4)} / ¥${parseFloat(params.value * 0.014).toFixed(4)}`; + }, + hideable: false + }, + { + field: 'complete', + sortable: false, + headerName: '完成倍率', + width: 150, + type: 'number', + editable: true, + valueFormatter: (params) => { + if (params.value == null) { + return ''; + } + return `$${parseFloat(params.value * 0.002).toFixed(4)} / ¥${parseFloat(params.value * 0.014).toFixed(4)}`; + }, + hideable: false + }, + { + field: 'actions', + type: 'actions', + headerName: '操作', + width: 100, + cellClassName: 'actions', + hideable: false, + getActions: ({ id }) => { + const isInEditMode = rowModesModel[id]?.mode === GridRowModes.Edit; + + if (isInEditMode) { + return [ + } + key={'Save-' + id} + label="Save" + sx={{ + color: 'primary.main' + }} + onClick={handleSaveClick(id)} + />, + } + key={'Cancel-' + id} + label="Cancel" + className="textPrimary" + onClick={handleCancelClick(id)} + color="inherit" + /> + ]; + } + + return [ + } + label="Edit" + className="textPrimary" + onClick={handleEditClick(id)} + color="inherit" + />, + } + label="Delete" + onClick={handleDeleteClick(id)} + color="inherit" + /> + ]; + } + } + ], + [handleEditClick, handleSaveClick, handleDeleteClick, handleCancelClick, rowModesModel] + ); + + useEffect(() => { + let modelRatioList = []; + let itemJson = JSON.parse(ratio); + let id = 0; + for (let key in itemJson) { + modelRatioList.push({ id: id++, model: key, input: itemJson[key][0], complete: itemJson[key][1] }); + } + setRows(modelRatioList); + }, [ratio]); + + return ( + + + + ); +}; + +ModelRationDataGrid.propTypes = { + ratio: PropTypes.string.isRequired, + onChange: PropTypes.func.isRequired +}; + +export default ModelRationDataGrid; diff --git a/web/src/views/Setting/component/OperationSetting.js b/web/src/views/Setting/component/OperationSetting.js index 27af978c..ec2bc715 100644 --- a/web/src/views/Setting/component/OperationSetting.js +++ b/web/src/views/Setting/component/OperationSetting.js @@ -1,11 +1,12 @@ import { useState, useEffect } from 'react'; import SubCard from 'ui-component/cards/SubCard'; -import { Stack, FormControl, InputLabel, OutlinedInput, Checkbox, Button, FormControlLabel, TextField } from '@mui/material'; +import { Stack, FormControl, InputLabel, OutlinedInput, Checkbox, Button, FormControlLabel, TextField, Alert, Switch } from '@mui/material'; import { showSuccess, showError, verifyJSON } from 'utils/common'; import { API } from 'utils/api'; import { AdapterDayjs } from '@mui/x-date-pickers/AdapterDayjs'; import { LocalizationProvider } from '@mui/x-date-pickers/LocalizationProvider'; import { DateTimePicker } from '@mui/x-date-pickers/DateTimePicker'; +import ModelRationDataGrid from './ModelRationDataGrid'; import dayjs from 'dayjs'; require('dayjs/locale/zh-cn'); @@ -32,6 +33,7 @@ const OperationSetting = () => { RetryTimes: 0 }); const [originInputs, setOriginInputs] = useState({}); + const [newModelRatioView, setNewModelRatioView] = useState(false); let [loading, setLoading] = useState(false); let [historyTimestamp, setHistoryTimestamp] = useState(now.getTime() / 1000 - 30 * 24 * 3600); // a month ago new Date().getTime() / 1000 + 3600 @@ -431,21 +433,6 @@ const OperationSetting = () => { - - - - { placeholder="为一个 JSON 文本,键为分组名称,值为倍率" /> + + + + 配置格式为 JSON 文本,键为模型名称;值第一位为输入倍率,第二位为完成倍率,如果只有单一倍率则两者值相同。 +
美元:1 === $0.002 / 1K tokens 人民币: 1 === ¥0.014 / 1k tokens +
例如
gpt-4 输入: $0.03 / 1K tokens 完成:$0.06 / 1K tokens
+ 0.03 / 0.002 = 15, 0.06 / 0.002 = 30,即输入倍率为 15,完成倍率为 30 +
+ { + setNewModelRatioView(!newModelRatioView); + }} + /> + } + label="使用新编辑器" + /> +
+ + {newModelRatioView ? ( + + ) : ( + + + + )}