diff --git a/common/constants.go b/common/constants.go index c9fed019..07214b0d 100644 --- a/common/constants.go +++ b/common/constants.go @@ -186,6 +186,7 @@ const ( ChannelTypeAIProxyLibrary = 21 ChannelTypeFastGPT = 22 ChannelTypeTencent = 23 + ChannelTypeAzureSpeech = 24 ) var ChannelBaseURLs = []string{ @@ -212,7 +213,8 @@ var ChannelBaseURLs = []string{ "https://openrouter.ai/api", // 20 "https://api.aiproxy.io", // 21 "https://fastgpt.run/api/openapi", // 22 - "https://hunyuan.cloud.tencent.com", //23 + "https://hunyuan.cloud.tencent.com", // 23 + "", // 24 } const ( diff --git a/providers/azureSpeech/base.go b/providers/azureSpeech/base.go new file mode 100644 index 00000000..9d6dbdd6 --- /dev/null +++ b/providers/azureSpeech/base.go @@ -0,0 +1,36 @@ +package azureSpeech + +import ( + "one-api/providers/base" + + "github.com/gin-gonic/gin" +) + +// 定义供应商工厂 +type AzureSpeechProviderFactory struct{} + +// 创建 AliProvider +func (f AzureSpeechProviderFactory) Create(c *gin.Context) base.ProviderInterface { + return &AzureSpeechProvider{ + BaseProvider: base.BaseProvider{ + BaseURL: "", + AudioSpeech: "/cognitiveservices/v1", + Context: c, + }, + } +} + +type AzureSpeechProvider struct { + base.BaseProvider +} + +// 获取请求头 +func (p *AzureSpeechProvider) GetRequestHeaders() (headers map[string]string) { + headers = make(map[string]string) + headers["Ocp-Apim-Subscription-Key"] = p.Context.GetString("api_key") + headers["Content-Type"] = "application/ssml+xml" + headers["User-Agent"] = "OneAPI" + // headers["X-Microsoft-OutputFormat"] = "audio-16khz-128kbitrate-mono-mp3" + + return headers +} diff --git a/providers/azureSpeech/speech.go b/providers/azureSpeech/speech.go new file mode 100644 index 00000000..8f215915 --- /dev/null +++ b/providers/azureSpeech/speech.go @@ -0,0 +1,88 @@ +package azureSpeech + +import ( + "bytes" + "fmt" + "net/http" + "one-api/common" + "one-api/types" +) + +var outputFormatMap = map[string]string{ + "mp3": "audio-16khz-128kbitrate-mono-mp3", + "opus": "audio-16khz-128kbitrate-mono-opus", + "aac": "audio-24khz-160kbitrate-mono-mp3", + "flac": "audio-48khz-192kbitrate-mono-mp3", +} + +func CreateSSML(text string, name string, role string) string { + ssmlTemplate := ` + + %s + + ` + + roleAttribute := "" + if role != "" { + roleAttribute = fmt.Sprintf("role='%s'", role) + } + + return fmt.Sprintf(ssmlTemplate, roleAttribute, name, text) +} + +func (p *AzureSpeechProvider) getRequestBody(request *types.SpeechAudioRequest) *bytes.Buffer { + voiceMap := map[string][]string{ + "alloy": {"zh-CN-YunxiNeural"}, + "echo": {"zh-CN-YunyangNeural"}, + "fable": {"zh-CN-YunxiNeural", "Boy"}, + "onyx": {"zh-CN-YunyeNeural"}, + "nova": {"zh-CN-XiaochenNeural"}, + "shimmer": {"zh-CN-XiaohanNeural"}, + } + + voice := "" + role := "" + if voiceMap[request.Voice] != nil { + voice = voiceMap[request.Voice][0] + if len(voiceMap[request.Voice]) > 1 { + role = voiceMap[request.Voice][1] + } + } + + ssml := CreateSSML(request.Input, voice, role) + + return bytes.NewBufferString(ssml) + +} + +func (p *AzureSpeechProvider) SpeechAction(request *types.SpeechAudioRequest, isModelMapped bool, promptTokens int) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode) { + + fullRequestURL := p.GetFullRequestURL(p.AudioSpeech, request.Model) + headers := p.GetRequestHeaders() + responseFormatr := outputFormatMap[request.ResponseFormat] + if responseFormatr == "" { + responseFormatr = outputFormatMap["mp3"] + } + headers["X-Microsoft-OutputFormat"] = responseFormatr + + requestBody := p.getRequestBody(request) + + client := common.NewClient() + req, err := client.NewRequest(p.Context.Request.Method, fullRequestURL, common.WithBody(requestBody), common.WithHeader(headers)) + if err != nil { + return nil, common.ErrorWrapper(err, "new_request_failed", http.StatusInternalServerError) + } + + errWithCode = p.SendRequestRaw(req) + if errWithCode != nil { + return + } + + usage = &types.Usage{ + PromptTokens: promptTokens, + CompletionTokens: 0, + TotalTokens: promptTokens, + } + + return +} diff --git a/providers/providers.go b/providers/providers.go index ceccaeaf..e84cabeb 100644 --- a/providers/providers.go +++ b/providers/providers.go @@ -8,6 +8,7 @@ import ( "one-api/providers/api2d" "one-api/providers/api2gpt" "one-api/providers/azure" + azurespeech "one-api/providers/azureSpeech" "one-api/providers/baidu" "one-api/providers/base" "one-api/providers/claude" @@ -47,6 +48,7 @@ func init() { providerFactories[common.ChannelTypeOpenAISB] = openaisb.OpenaiSBProviderFactory{} providerFactories[common.ChannelTypeAIGC2D] = aigc2d.Aigc2dProviderFactory{} providerFactories[common.ChannelTypeAPI2GPT] = api2gpt.Api2gptProviderFactory{} + providerFactories[common.ChannelTypeAzureSpeech] = azurespeech.AzureSpeechProviderFactory{} } diff --git a/web/src/constants/channel.constants.js b/web/src/constants/channel.constants.js index 76407745..10a73f4c 100644 --- a/web/src/constants/channel.constants.js +++ b/web/src/constants/channel.constants.js @@ -9,6 +9,7 @@ export const CHANNEL_OPTIONS = [ { key: 16, text: '智谱 ChatGLM', value: 16, color: 'violet' }, { key: 19, text: '360 智脑', value: 19, color: 'blue' }, { key: 23, text: '腾讯混元', value: 23, color: 'teal' }, + { key: 24, text: 'Azure Speech', value: 24, color: 'olive' }, { key: 8, text: '自定义渠道', value: 8, color: 'pink' }, { key: 22, text: '知识库:FastGPT', value: 22, color: 'blue' }, { key: 21, text: '知识库:AI Proxy', value: 21, color: 'purple' },