diff --git a/router/src/lib.rs b/router/src/lib.rs index d7cfa4c7..35c22763 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -40,13 +40,16 @@ pub(crate) struct GenerateParameters { example = 0.95 )] pub top_p: Option, - #[serde(default = "default_do_sample")] + #[serde(default)] #[schema(default = "false", example = true)] pub do_sample: bool, #[serde(default = "default_max_new_tokens")] #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")] pub max_new_tokens: u32, #[serde(default)] + #[schema(default = "false", example = false)] + pub return_full_text: bool, + #[serde(default)] #[schema(inline, max_items = 4, example = json ! (["photographer"]))] pub stop: Vec, #[serde(default)] @@ -56,10 +59,6 @@ pub(crate) struct GenerateParameters { pub seed: Option, } -fn default_do_sample() -> bool { - false -} - fn default_max_new_tokens() -> u32 { 20 } @@ -70,8 +69,9 @@ fn default_parameters() -> GenerateParameters { repetition_penalty: None, top_k: None, top_p: None, - do_sample: default_do_sample(), + do_sample: false, max_new_tokens: default_max_new_tokens(), + return_full_text: false, stop: vec![], details: false, seed: None, diff --git a/router/src/server.rs b/router/src/server.rs index e33b0e2d..289d1899 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -75,6 +75,7 @@ async fn health(infer: Extension) -> Result<(), (StatusCode, Json { error = true; - yield Ok(Event::from(err)) + yield Ok(Event::from(err)); + break; } } } @@ -327,7 +352,7 @@ async fn generate_stream( // yield error Err(err) => { error = true; - yield Ok(Event::from(err)) + yield Ok(Event::from(err)); } } // Check if generation reached the end @@ -336,7 +361,7 @@ async fn generate_stream( let err = InferError::IncompleteGeneration; metrics::increment_counter!("tgi_request_failure", "err" => "incomplete"); tracing::error!("{err}"); - yield Ok(Event::from(err)) + yield Ok(Event::from(err)); } };