fix: aws llama3 ratio
This commit is contained in:
parent
68d443cb3e
commit
a02a50d236
@ -2,6 +2,7 @@ package ratio
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/songquanpeng/one-api/common/logger"
|
"github.com/songquanpeng/one-api/common/logger"
|
||||||
@ -169,6 +170,9 @@ var ModelRatio = map[string]float64{
|
|||||||
"step-1v-32k": 0.024 * RMB,
|
"step-1v-32k": 0.024 * RMB,
|
||||||
"step-1-32k": 0.024 * RMB,
|
"step-1-32k": 0.024 * RMB,
|
||||||
"step-1-200k": 0.15 * RMB,
|
"step-1-200k": 0.15 * RMB,
|
||||||
|
// aws llama3 https://aws.amazon.com/cn/bedrock/pricing/
|
||||||
|
"llama3-8b-8192(33)": 0.0003 / 0.002, // $0.0003 / 1K tokens
|
||||||
|
"llama3-70b-8192(33)": 0.00265 / 0.002, // $0.00265 / 1K tokens
|
||||||
// https://cohere.com/pricing
|
// https://cohere.com/pricing
|
||||||
"command": 0.5,
|
"command": 0.5,
|
||||||
"command-nightly": 0.5,
|
"command-nightly": 0.5,
|
||||||
@ -185,7 +189,11 @@ var ModelRatio = map[string]float64{
|
|||||||
"deepl-ja": 25.0 / 1000 * USD,
|
"deepl-ja": 25.0 / 1000 * USD,
|
||||||
}
|
}
|
||||||
|
|
||||||
var CompletionRatio = map[string]float64{}
|
var CompletionRatio = map[string]float64{
|
||||||
|
// aws llama3
|
||||||
|
"llama3-8b-8192(33)": 0.0006 / 0.0003,
|
||||||
|
"llama3-70b-8192(33)": 0.0035 / 0.00265,
|
||||||
|
}
|
||||||
|
|
||||||
var DefaultModelRatio map[string]float64
|
var DefaultModelRatio map[string]float64
|
||||||
var DefaultCompletionRatio map[string]float64
|
var DefaultCompletionRatio map[string]float64
|
||||||
@ -234,23 +242,29 @@ func UpdateModelRatioByJSONString(jsonStr string) error {
|
|||||||
return json.Unmarshal([]byte(jsonStr), &ModelRatio)
|
return json.Unmarshal([]byte(jsonStr), &ModelRatio)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetModelRatio(name string) float64 {
|
func GetModelRatio(name string, channelType int) float64 {
|
||||||
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
|
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
|
||||||
name = strings.TrimSuffix(name, "-internet")
|
name = strings.TrimSuffix(name, "-internet")
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
|
if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
|
||||||
name = strings.TrimSuffix(name, "-internet")
|
name = strings.TrimSuffix(name, "-internet")
|
||||||
}
|
}
|
||||||
ratio, ok := ModelRatio[name]
|
model := fmt.Sprintf("%s(%d)", name, channelType)
|
||||||
if !ok {
|
if ratio, ok := ModelRatio[model]; ok {
|
||||||
ratio, ok = DefaultModelRatio[name]
|
return ratio
|
||||||
|
}
|
||||||
|
if ratio, ok := DefaultModelRatio[model]; ok {
|
||||||
|
return ratio
|
||||||
|
}
|
||||||
|
if ratio, ok := ModelRatio[name]; ok {
|
||||||
|
return ratio
|
||||||
|
}
|
||||||
|
if ratio, ok := DefaultModelRatio[name]; ok {
|
||||||
|
return ratio
|
||||||
}
|
}
|
||||||
if !ok {
|
|
||||||
logger.SysError("model ratio not found: " + name)
|
logger.SysError("model ratio not found: " + name)
|
||||||
return 30
|
return 30
|
||||||
}
|
}
|
||||||
return ratio
|
|
||||||
}
|
|
||||||
|
|
||||||
func CompletionRatio2JSONString() string {
|
func CompletionRatio2JSONString() string {
|
||||||
jsonBytes, err := json.Marshal(CompletionRatio)
|
jsonBytes, err := json.Marshal(CompletionRatio)
|
||||||
@ -265,7 +279,17 @@ func UpdateCompletionRatioByJSONString(jsonStr string) error {
|
|||||||
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
|
return json.Unmarshal([]byte(jsonStr), &CompletionRatio)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetCompletionRatio(name string) float64 {
|
func GetCompletionRatio(name string, channelType int) float64 {
|
||||||
|
if strings.HasPrefix(name, "qwen-") && strings.HasSuffix(name, "-internet") {
|
||||||
|
name = strings.TrimSuffix(name, "-internet")
|
||||||
|
}
|
||||||
|
model := fmt.Sprintf("%s(%d)", name, channelType)
|
||||||
|
if ratio, ok := CompletionRatio[model]; ok {
|
||||||
|
return ratio
|
||||||
|
}
|
||||||
|
if ratio, ok := DefaultCompletionRatio[model]; ok {
|
||||||
|
return ratio
|
||||||
|
}
|
||||||
if ratio, ok := CompletionRatio[name]; ok {
|
if ratio, ok := CompletionRatio[name]; ok {
|
||||||
return ratio
|
return ratio
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,10 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/songquanpeng/one-api/common"
|
"github.com/songquanpeng/one-api/common"
|
||||||
"github.com/songquanpeng/one-api/common/client"
|
"github.com/songquanpeng/one-api/common/client"
|
||||||
@ -21,9 +25,6 @@ import (
|
|||||||
"github.com/songquanpeng/one-api/relay/meta"
|
"github.com/songquanpeng/one-api/relay/meta"
|
||||||
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
||||||
"github.com/songquanpeng/one-api/relay/relaymode"
|
"github.com/songquanpeng/one-api/relay/relaymode"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatusCode {
|
func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatusCode {
|
||||||
@ -53,7 +54,7 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
modelRatio := billingratio.GetModelRatio(audioModel)
|
modelRatio := billingratio.GetModelRatio(audioModel, channelType)
|
||||||
groupRatio := billingratio.GetGroupRatio(group)
|
groupRatio := billingratio.GetGroupRatio(group)
|
||||||
ratio := modelRatio * groupRatio
|
ratio := modelRatio * groupRatio
|
||||||
var quota int64
|
var quota int64
|
||||||
|
@ -4,6 +4,10 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/songquanpeng/one-api/common"
|
"github.com/songquanpeng/one-api/common"
|
||||||
"github.com/songquanpeng/one-api/common/config"
|
"github.com/songquanpeng/one-api/common/config"
|
||||||
@ -16,9 +20,6 @@ import (
|
|||||||
"github.com/songquanpeng/one-api/relay/meta"
|
"github.com/songquanpeng/one-api/relay/meta"
|
||||||
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
||||||
"github.com/songquanpeng/one-api/relay/relaymode"
|
"github.com/songquanpeng/one-api/relay/relaymode"
|
||||||
"math"
|
|
||||||
"net/http"
|
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func getAndValidateTextRequest(c *gin.Context, relayMode int) (*relaymodel.GeneralOpenAIRequest, error) {
|
func getAndValidateTextRequest(c *gin.Context, relayMode int) (*relaymodel.GeneralOpenAIRequest, error) {
|
||||||
@ -95,7 +96,7 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
var quota int64
|
var quota int64
|
||||||
completionRatio := billingratio.GetCompletionRatio(textRequest.Model)
|
completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
|
||||||
promptTokens := usage.PromptTokens
|
promptTokens := usage.PromptTokens
|
||||||
completionTokens := usage.CompletionTokens
|
completionTokens := usage.CompletionTokens
|
||||||
quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
|
quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
|
||||||
|
@ -6,6 +6,9 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/songquanpeng/one-api/common"
|
"github.com/songquanpeng/one-api/common"
|
||||||
"github.com/songquanpeng/one-api/common/ctxkey"
|
"github.com/songquanpeng/one-api/common/ctxkey"
|
||||||
@ -17,8 +20,6 @@ import (
|
|||||||
"github.com/songquanpeng/one-api/relay/channeltype"
|
"github.com/songquanpeng/one-api/relay/channeltype"
|
||||||
"github.com/songquanpeng/one-api/relay/meta"
|
"github.com/songquanpeng/one-api/relay/meta"
|
||||||
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
relaymodel "github.com/songquanpeng/one-api/relay/model"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func getImageRequest(c *gin.Context, relayMode int) (*relaymodel.ImageRequest, error) {
|
func getImageRequest(c *gin.Context, relayMode int) (*relaymodel.ImageRequest, error) {
|
||||||
@ -166,7 +167,7 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
|
|||||||
requestBody = bytes.NewBuffer(jsonStr)
|
requestBody = bytes.NewBuffer(jsonStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
modelRatio := billingratio.GetModelRatio(imageModel)
|
modelRatio := billingratio.GetModelRatio(imageModel, meta.ChannelType)
|
||||||
groupRatio := billingratio.GetGroupRatio(meta.Group)
|
groupRatio := billingratio.GetGroupRatio(meta.Group)
|
||||||
ratio := modelRatio * groupRatio
|
ratio := modelRatio * groupRatio
|
||||||
userQuota, err := model.CacheGetUserQuota(ctx, meta.UserId)
|
userQuota, err := model.CacheGetUserQuota(ctx, meta.UserId)
|
||||||
|
@ -4,6 +4,9 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/songquanpeng/one-api/common/logger"
|
"github.com/songquanpeng/one-api/common/logger"
|
||||||
"github.com/songquanpeng/one-api/relay"
|
"github.com/songquanpeng/one-api/relay"
|
||||||
@ -14,8 +17,6 @@ import (
|
|||||||
"github.com/songquanpeng/one-api/relay/channeltype"
|
"github.com/songquanpeng/one-api/relay/channeltype"
|
||||||
"github.com/songquanpeng/one-api/relay/meta"
|
"github.com/songquanpeng/one-api/relay/meta"
|
||||||
"github.com/songquanpeng/one-api/relay/model"
|
"github.com/songquanpeng/one-api/relay/model"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
|
func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
|
||||||
@ -35,7 +36,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
|
|||||||
textRequest.Model, isModelMapped = getMappedModelName(textRequest.Model, meta.ModelMapping)
|
textRequest.Model, isModelMapped = getMappedModelName(textRequest.Model, meta.ModelMapping)
|
||||||
meta.ActualModelName = textRequest.Model
|
meta.ActualModelName = textRequest.Model
|
||||||
// get model ratio & group ratio
|
// get model ratio & group ratio
|
||||||
modelRatio := billingratio.GetModelRatio(textRequest.Model)
|
modelRatio := billingratio.GetModelRatio(textRequest.Model, meta.ChannelType)
|
||||||
groupRatio := billingratio.GetGroupRatio(meta.Group)
|
groupRatio := billingratio.GetGroupRatio(meta.Group)
|
||||||
ratio := modelRatio * groupRatio
|
ratio := modelRatio * groupRatio
|
||||||
// pre-consume quota
|
// pre-consume quota
|
||||||
|
Loading…
Reference in New Issue
Block a user