From a1b3887846217352d4e608bb264625be207c39f5 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 7 Mar 2025 13:25:24 +0100
Subject: [PATCH] Update docstring in `launcher/src/main.rs` instead

---
 launcher/src/main.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index cd4b2231..321d7c69 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -702,8 +702,8 @@ struct Args {
     /// Overall this number should be the largest possible amount that fits the
     /// remaining memory (after the model is loaded). Since the actual memory overhead
     /// depends on other parameters like if you're using quantization, flash attention
-    /// or the model implementation, text-generation-inference cannot infer this number
-    /// automatically.
+    /// or the model implementation, text-generation-inference infers this number automatically
+    /// if not provided ensuring that the value is as large as possible.
     #[clap(long, env)]
     max_batch_total_tokens: Option<u32>,