diff --git a/router/src/lib.rs b/router/src/lib.rs index d375eafb..efc4d3ae 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -56,12 +56,15 @@ pub(crate) struct GenerateParameters { #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")] pub max_new_tokens: u32, #[serde(default)] - #[schema(default = "None", example = false)] + #[schema(default = "null", example = false)] pub return_full_text: Option, #[serde(default)] #[schema(inline, max_items = 4, example = json ! (["photographer"]))] pub stop: Vec, #[serde(default)] + #[schema(default = "null", example = "null")] + pub truncate: Option, + #[serde(default)] #[schema(default = "false", example = true)] pub watermark: bool, #[serde(default)] @@ -86,6 +89,7 @@ fn default_parameters() -> GenerateParameters { max_new_tokens: default_max_new_tokens(), return_full_text: None, stop: Vec::new(), + truncate: None, watermark: false, details: false, seed: None, diff --git a/router/src/server.rs b/router/src/server.rs index 2ce5699d..ef10b7b1 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -73,6 +73,7 @@ async fn health(infer: Extension) -> Result<(), (StatusCode, Json { + Ok(mut encoding) => { + encoding.truncate() + let input_length = encoding.len(); let total_tokens = input_length + max_new_tokens as usize;