feat: refactor response parsing logic to support multiple formats (#782)
* feat: Refactor response parsing logic to support multiple formats The parsing logic for responses in relay.go and relay-audio.go was refactored to support multiple response formats - 'json', 'text', 'srt', 'verbose_json', and 'vtt'. The existing `WhisperResponse` struct was renamed to `WhisperJsonResponse` and a new struct `WhisperVerboseJsonResponse` was added to support the 'verbose_json' format. Additional parsing functions were added to extract text from these new response types. This change was necessary to make the parsing logic more flexible and extendable for different types of responses. * chore: update name --------- Co-authored-by: JustSong <songquanpeng@foxmail.com>
This commit is contained in:
parent
379074f7d0
commit
9ba5388367
@ -1,6 +1,7 @@
|
|||||||
package controller
|
package controller
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@ -102,7 +103,13 @@ func relayAudioHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode
|
|||||||
fullRequestURL = fmt.Sprintf("%s/openai/deployments/%s/audio/transcriptions?api-version=%s", baseURL, audioModel, apiVersion)
|
fullRequestURL = fmt.Sprintf("%s/openai/deployments/%s/audio/transcriptions?api-version=%s", baseURL, audioModel, apiVersion)
|
||||||
}
|
}
|
||||||
|
|
||||||
requestBody := c.Request.Body
|
requestBody := &bytes.Buffer{}
|
||||||
|
_, err = io.Copy(requestBody, c.Request.Body)
|
||||||
|
if err != nil {
|
||||||
|
return errorWrapper(err, "new_request_body_failed", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody.Bytes()))
|
||||||
|
responseFormat := c.DefaultPostForm("response_format", "json")
|
||||||
|
|
||||||
req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
|
req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -144,12 +151,33 @@ func relayAudioHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return errorWrapper(err, "close_response_body_failed", http.StatusInternalServerError)
|
return errorWrapper(err, "close_response_body_failed", http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
var whisperResponse WhisperResponse
|
|
||||||
err = json.Unmarshal(responseBody, &whisperResponse)
|
var openAIErr TextResponse
|
||||||
if err != nil {
|
if err = json.Unmarshal(responseBody, &openAIErr); err == nil {
|
||||||
return errorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError)
|
if openAIErr.Error.Message != "" {
|
||||||
|
return errorWrapper(fmt.Errorf("type %s, code %v, message %s", openAIErr.Error.Type, openAIErr.Error.Code, openAIErr.Error.Message), "request_error", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
quota = countTokenText(whisperResponse.Text, audioModel)
|
|
||||||
|
var text string
|
||||||
|
switch responseFormat {
|
||||||
|
case "json":
|
||||||
|
text, err = getTextFromJSON(responseBody)
|
||||||
|
case "text":
|
||||||
|
text, err = getTextFromText(responseBody)
|
||||||
|
case "srt":
|
||||||
|
text, err = getTextFromSRT(responseBody)
|
||||||
|
case "verbose_json":
|
||||||
|
text, err = getTextFromVerboseJSON(responseBody)
|
||||||
|
case "vtt":
|
||||||
|
text, err = getTextFromVTT(responseBody)
|
||||||
|
default:
|
||||||
|
return errorWrapper(errors.New("unexpected_response_format"), "unexpected_response_format", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return errorWrapper(err, "get_text_from_body_err", http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
quota = countTokenText(text, audioModel)
|
||||||
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
|
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
|
||||||
}
|
}
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
@ -187,3 +215,48 @@ func relayAudioHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getTextFromVTT(body []byte) (string, error) {
|
||||||
|
return getTextFromSRT(body)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTextFromVerboseJSON(body []byte) (string, error) {
|
||||||
|
var whisperResponse WhisperVerboseJSONResponse
|
||||||
|
if err := json.Unmarshal(body, &whisperResponse); err != nil {
|
||||||
|
return "", fmt.Errorf("unmarshal_response_body_failed err :%w", err)
|
||||||
|
}
|
||||||
|
return whisperResponse.Text, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTextFromSRT(body []byte) (string, error) {
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(string(body)))
|
||||||
|
var builder strings.Builder
|
||||||
|
var textLine bool
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if textLine {
|
||||||
|
builder.WriteString(line)
|
||||||
|
textLine = false
|
||||||
|
continue
|
||||||
|
} else if strings.Contains(line, "-->") {
|
||||||
|
textLine = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return builder.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTextFromText(body []byte) (string, error) {
|
||||||
|
return strings.TrimSuffix(string(body), "\n"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTextFromJSON(body []byte) (string, error) {
|
||||||
|
var whisperResponse WhisperJSONResponse
|
||||||
|
if err := json.Unmarshal(body, &whisperResponse); err != nil {
|
||||||
|
return "", fmt.Errorf("unmarshal_response_body_failed err :%w", err)
|
||||||
|
}
|
||||||
|
return whisperResponse.Text, nil
|
||||||
|
}
|
||||||
|
@ -141,10 +141,31 @@ type ImageRequest struct {
|
|||||||
User string `json:"user,omitempty"`
|
User string `json:"user,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type WhisperResponse struct {
|
type WhisperJSONResponse struct {
|
||||||
Text string `json:"text,omitempty"`
|
Text string `json:"text,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type WhisperVerboseJSONResponse struct {
|
||||||
|
Task string `json:"task,omitempty"`
|
||||||
|
Language string `json:"language,omitempty"`
|
||||||
|
Duration float64 `json:"duration,omitempty"`
|
||||||
|
Text string `json:"text,omitempty"`
|
||||||
|
Segments []Segment `json:"segments,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Segment struct {
|
||||||
|
Id int `json:"id"`
|
||||||
|
Seek int `json:"seek"`
|
||||||
|
Start float64 `json:"start"`
|
||||||
|
End float64 `json:"end"`
|
||||||
|
Text string `json:"text"`
|
||||||
|
Tokens []int `json:"tokens"`
|
||||||
|
Temperature float64 `json:"temperature"`
|
||||||
|
AvgLogprob float64 `json:"avg_logprob"`
|
||||||
|
CompressionRatio float64 `json:"compression_ratio"`
|
||||||
|
NoSpeechProb float64 `json:"no_speech_prob"`
|
||||||
|
}
|
||||||
|
|
||||||
type TextToSpeechRequest struct {
|
type TextToSpeechRequest struct {
|
||||||
Model string `json:"model" binding:"required"`
|
Model string `json:"model" binding:"required"`
|
||||||
Input string `json:"input" binding:"required"`
|
Input string `json:"input" binding:"required"`
|
||||||
|
Loading…
Reference in New Issue
Block a user