From 70f485bf9f601b5450b00894f56e20b973d1c2e4 Mon Sep 17 00:00:00 2001 From: Robert Kimball Date: Wed, 28 Jun 2023 02:50:12 -0700 Subject: [PATCH] feat(router): add header option to disable buffering for the generate_stream response (#498) # This PR adds an http header option to disable buffering for the generate_stream endpoint response stream. Problem: If a model is run behind a proxy server such as nginx that has buffering enabled then the response stream from generate_stream gets aggregated into a single response which basically disables streaming. Instead of getting a chunked response where each token is presented over time the response presents everything all at once. Solution: This change adds the `X-Accel-Buffering` http header which disables buffering for the generate_stream response, allowing the response to stream properly. --- router/src/server.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/router/src/server.rs b/router/src/server.rs index b8c67b2c..dd8bc874 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -351,6 +351,7 @@ async fn generate_stream( "x-compute-characters", compute_characters.to_string().parse().unwrap(), ); + headers.insert("X-Accel-Buffering", "no".parse().unwrap()); let stream = async_stream::stream! { // Inference