From a1b3887846217352d4e608bb264625be207c39f5 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 7 Mar 2025 13:25:24 +0100 Subject: [PATCH] Update docstring in `launcher/src/main.rs` instead --- launcher/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index cd4b2231..321d7c69 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -702,8 +702,8 @@ struct Args { /// Overall this number should be the largest possible amount that fits the /// remaining memory (after the model is loaded). Since the actual memory overhead /// depends on other parameters like if you're using quantization, flash attention - /// or the model implementation, text-generation-inference cannot infer this number - /// automatically. + /// or the model implementation, text-generation-inference infers this number automatically + /// if not provided ensuring that the value is as large as possible. #[clap(long, env)] max_batch_total_tokens: Option,