✨ 添加azure speech支持

2023-12-02 22:13:47 +08:00 · 2023-12-02 22:13:47 +08:00 · a884c4b0bf
commit a884c4b0bf
parent c97c8a0f65
5 changed files with 130 additions and 1 deletions
--- a/common/constants.go
+++ b/common/constants.go
@ -186,6 +186,7 @@ const (
 	ChannelTypeAIProxyLibrary = 21
 	ChannelTypeFastGPT        = 22
 	ChannelTypeTencent        = 23
+	ChannelTypeAzureSpeech    = 24
 )

 var ChannelBaseURLs = []string{
@ -212,7 +213,8 @@ var ChannelBaseURLs = []string{
 	"https://openrouter.ai/api",         // 20
 	"https://api.aiproxy.io",            // 21
 	"https://fastgpt.run/api/openapi",   // 22
-	"https://hunyuan.cloud.tencent.com", //23
+	"https://hunyuan.cloud.tencent.com", // 23
+	"",                                  // 24
 }

 const (
--- a/providers/azureSpeech/base.go
+++ b/providers/azureSpeech/base.go
@ -0,0 +1,36 @@
+package azureSpeech
+
+import (
+	"one-api/providers/base"
+
+	"github.com/gin-gonic/gin"
+)
+
+// 定义供应商工厂
+type AzureSpeechProviderFactory struct{}
+
+// 创建 AliProvider
+func (f AzureSpeechProviderFactory) Create(c *gin.Context) base.ProviderInterface {
+	return &AzureSpeechProvider{
+		BaseProvider: base.BaseProvider{
+			BaseURL:     "",
+			AudioSpeech: "/cognitiveservices/v1",
+			Context:     c,
+		},
+	}
+}
+
+type AzureSpeechProvider struct {
+	base.BaseProvider
+}
+
+// 获取请求头
+func (p *AzureSpeechProvider) GetRequestHeaders() (headers map[string]string) {
+	headers = make(map[string]string)
+	headers["Ocp-Apim-Subscription-Key"] = p.Context.GetString("api_key")
+	headers["Content-Type"] = "application/ssml+xml"
+	headers["User-Agent"] = "OneAPI"
+	// headers["X-Microsoft-OutputFormat"] = "audio-16khz-128kbitrate-mono-mp3"
+
+	return headers
+}
--- a/providers/azureSpeech/speech.go
+++ b/providers/azureSpeech/speech.go
@ -0,0 +1,88 @@
+package azureSpeech
+
+import (
+	"bytes"
+	"fmt"
+	"net/http"
+	"one-api/common"
+	"one-api/types"
+)
+
+var outputFormatMap = map[string]string{
+	"mp3":  "audio-16khz-128kbitrate-mono-mp3",
+	"opus": "audio-16khz-128kbitrate-mono-opus",
+	"aac":  "audio-24khz-160kbitrate-mono-mp3",
+	"flac": "audio-48khz-192kbitrate-mono-mp3",
+}
+
+func CreateSSML(text string, name string, role string) string {
+	ssmlTemplate := `<speak version='1.0' xml:lang='en-US'>
+        <voice xml:lang='en-US' %s name='%s'>
+            %s
+        </voice>
+    </speak>`
+
+	roleAttribute := ""
+	if role != "" {
+		roleAttribute = fmt.Sprintf("role='%s'", role)
+	}
+
+	return fmt.Sprintf(ssmlTemplate, roleAttribute, name, text)
+}
+
+func (p *AzureSpeechProvider) getRequestBody(request *types.SpeechAudioRequest) *bytes.Buffer {
+	voiceMap := map[string][]string{
+		"alloy":   {"zh-CN-YunxiNeural"},
+		"echo":    {"zh-CN-YunyangNeural"},
+		"fable":   {"zh-CN-YunxiNeural", "Boy"},
+		"onyx":    {"zh-CN-YunyeNeural"},
+		"nova":    {"zh-CN-XiaochenNeural"},
+		"shimmer": {"zh-CN-XiaohanNeural"},
+	}
+
+	voice := ""
+	role := ""
+	if voiceMap[request.Voice] != nil {
+		voice = voiceMap[request.Voice][0]
+		if len(voiceMap[request.Voice]) > 1 {
+			role = voiceMap[request.Voice][1]
+		}
+	}
+
+	ssml := CreateSSML(request.Input, voice, role)
+
+	return bytes.NewBufferString(ssml)
+
+}
+
+func (p *AzureSpeechProvider) SpeechAction(request *types.SpeechAudioRequest, isModelMapped bool, promptTokens int) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode) {
+
+	fullRequestURL := p.GetFullRequestURL(p.AudioSpeech, request.Model)
+	headers := p.GetRequestHeaders()
+	responseFormatr := outputFormatMap[request.ResponseFormat]
+	if responseFormatr == "" {
+		responseFormatr = outputFormatMap["mp3"]
+	}
+	headers["X-Microsoft-OutputFormat"] = responseFormatr
+
+	requestBody := p.getRequestBody(request)
+
+	client := common.NewClient()
+	req, err := client.NewRequest(p.Context.Request.Method, fullRequestURL, common.WithBody(requestBody), common.WithHeader(headers))
+	if err != nil {
+		return nil, common.ErrorWrapper(err, "new_request_failed", http.StatusInternalServerError)
+	}
+
+	errWithCode = p.SendRequestRaw(req)
+	if errWithCode != nil {
+		return
+	}
+
+	usage = &types.Usage{
+		PromptTokens:     promptTokens,
+		CompletionTokens: 0,
+		TotalTokens:      promptTokens,
+	}
+
+	return
+}
--- a/providers/providers.go
+++ b/providers/providers.go
@ -8,6 +8,7 @@ import (
 	"one-api/providers/api2d"
 	"one-api/providers/api2gpt"
 	"one-api/providers/azure"
+	azurespeech "one-api/providers/azureSpeech"
 	"one-api/providers/baidu"
 	"one-api/providers/base"
 	"one-api/providers/claude"
@ -47,6 +48,7 @@ func init() {
 	providerFactories[common.ChannelTypeOpenAISB] = openaisb.OpenaiSBProviderFactory{}
 	providerFactories[common.ChannelTypeAIGC2D] = aigc2d.Aigc2dProviderFactory{}
 	providerFactories[common.ChannelTypeAPI2GPT] = api2gpt.Api2gptProviderFactory{}
+	providerFactories[common.ChannelTypeAzureSpeech] = azurespeech.AzureSpeechProviderFactory{}

 }

--- a/web/src/constants/channel.constants.js
+++ b/web/src/constants/channel.constants.js
@ -9,6 +9,7 @@ export const CHANNEL_OPTIONS = [
  { key: 16, text: '智谱 ChatGLM', value: 16, color: 'violet' },
  { key: 19, text: '360 智脑', value: 19, color: 'blue' },
  { key: 23, text: '腾讯混元', value: 23, color: 'teal' },
+  { key: 24, text: 'Azure Speech', value: 24, color: 'olive' },
  { key: 8, text: '自定义渠道', value: 8, color: 'pink' },
  { key: 22, text: '知识库：FastGPT', value: 22, color: 'blue' },
  { key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple' },