diff --git a/router/src/server.rs b/router/src/server.rs index e609821c..fac4708c 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -128,6 +128,11 @@ example = json ! ({"error": "Model is overloaded"})), example = json ! ({"error": "Input validation error"})), (status = 500, description = "Incomplete generation", body = ErrorResponse, example = json ! ({"error": "Incomplete generation"})), +params(("max_input_length" = usize, Path, description = "This is the maximum allowed input +length (expressed in number of tokens) for users. The larger this value, the longer prompt +users can send which can impact the overall memory required to handle the load. +Please note that some models have a finite range of sequence they can handle."), +)), ) )] #[instrument(