diff --git a/router/src/lib.rs b/router/src/lib.rs index e7e1446e..2395e3e2 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -959,20 +959,6 @@ pub(crate) struct Details { pub top_tokens: Vec>, } -impl Default for Details { - fn default() -> Self { - Self { - finish_reason: FinishReason::Length, - generated_tokens: 0, - seed: None, - prefill: Vec::new(), - tokens: Vec::new(), - best_of_sequences: None, - top_tokens: Vec::new(), - } - } -} - #[derive(Serialize, ToSchema)] pub(crate) struct GenerateResponse { #[schema(example = "test")] diff --git a/router/src/server.rs b/router/src/server.rs index 3b7c8e2e..a9779420 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -551,7 +551,11 @@ async fn generate_stream_internal( path = "/v1/completions", request_body = CompletionRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = Completion), + ("text/event-stream" = CompletionCompleteChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse, @@ -934,7 +938,11 @@ async fn completions( path = "/v1/chat/completions", request_body = ChatRequest, responses( - (status = 200, description = "Generated Text", body = ChatCompletionChunk), + (status = 200, description = "Generated Chat Completion", + content( + ("application/json" = ChatCompletion), + ("text/event-stream" = ChatCompletionChunk), + )), (status = 424, description = "Generation Error", body = ErrorResponse, example = json ! ({"error": "Request failed during generation"})), (status = 429, description = "Model is overloaded", body = ErrorResponse,