From e5503eba781143f6f583c06cfcb6f0bf11349940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Kaunism=C3=A4ki?= Date: Thu, 20 Mar 2025 14:25:56 +0100 Subject: [PATCH] configurable termination timeout (#3126) * make shard and webserver termination timeouts configurable * Updating documentation. * Fmt. --------- Co-authored-by: Nicolas Patry --- docs/source/reference/launcher.md | 9 +++++++++ launcher/src/main.rs | 24 ++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/launcher.md b/docs/source/reference/launcher.md index 2d9b58d9b..6505a08dc 100644 --- a/docs/source/reference/launcher.md +++ b/docs/source/reference/launcher.md @@ -477,6 +477,15 @@ Options: [env: ENABLE_PREFILL_LOGPROBS=] +``` +## GRACEFUL_TERMINATION_TIMEOUT +```shell + -g, --graceful-termination-timeout + Change timeout of graceful termination of the TGI server + + [env: GRACEFUL_TERMINATION_TIMEOUT=] + [default: 90] + ``` ## HELP ```shell diff --git a/launcher/src/main.rs b/launcher/src/main.rs index e3abb843d..250613812 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -892,6 +892,10 @@ struct Args { /// Using this flag reallows users to ask for them. #[clap(long, env)] enable_prefill_logprobs: bool, + + /// Change timeout of graceful termination of the TGI server + #[clap(default_value = "90", long, short, env)] + graceful_termination_timeout: u64, } #[derive(Debug)] @@ -933,6 +937,7 @@ fn shard_manager( log_level: LevelFilter, status_sender: mpsc::Sender, shutdown: Arc, + graceful_termination_timeout: u64, _shutdown_sender: mpsc::Sender<()>, ) { // Enter shard-manager tracing span @@ -1206,7 +1211,12 @@ fn shard_manager( // We received a shutdown signal if shutdown.load(Ordering::SeqCst) { - terminate("shard", p, Duration::from_secs(90)).unwrap(); + terminate( + "shard", + p, + Duration::from_secs(graceful_termination_timeout), + ) + .unwrap(); return; } @@ -1545,6 +1555,7 @@ fn spawn_shards( status_receiver: &mpsc::Receiver, status_sender: mpsc::Sender, running: Arc, + graceful_termination_timeout: u64, ) -> Result<(), LauncherError> { // Start shard processes for rank in 0..num_shard { @@ -1612,6 +1623,7 @@ fn spawn_shards( max_log_level, status_sender, shutdown, + graceful_termination_timeout, shutdown_sender, ) }); @@ -1999,6 +2011,8 @@ fn main() -> Result<(), LauncherError> { // Pattern match configuration let args: Args = Args::parse(); + let graceful_termination_timeout = args.graceful_termination_timeout; + // Filter events with LOG_LEVEL let varname = "LOG_LEVEL"; let env_filter = if let Ok(log_level) = std::env::var(varname) { @@ -2263,6 +2277,7 @@ fn main() -> Result<(), LauncherError> { &status_receiver, status_sender, running.clone(), + graceful_termination_timeout, )?; // We might have received a termination signal @@ -2307,7 +2322,12 @@ fn main() -> Result<(), LauncherError> { } // Graceful termination - terminate("webserver", webserver, Duration::from_secs(90)).unwrap(); + terminate( + "webserver", + webserver, + Duration::from_secs(graceful_termination_timeout), + ) + .unwrap(); shutdown_shards(shutdown, &shutdown_receiver); exit_code