mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
configurable termination timeout (#3126)
* make shard and webserver termination timeouts configurable * Updating documentation. * Fmt. --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
e497bc09f6
commit
e5503eba78
@ -477,6 +477,15 @@ Options:
|
|||||||
|
|
||||||
[env: ENABLE_PREFILL_LOGPROBS=]
|
[env: ENABLE_PREFILL_LOGPROBS=]
|
||||||
|
|
||||||
|
```
|
||||||
|
## GRACEFUL_TERMINATION_TIMEOUT
|
||||||
|
```shell
|
||||||
|
-g, --graceful-termination-timeout <GRACEFUL_TERMINATION_TIMEOUT>
|
||||||
|
Change timeout of graceful termination of the TGI server
|
||||||
|
|
||||||
|
[env: GRACEFUL_TERMINATION_TIMEOUT=]
|
||||||
|
[default: 90]
|
||||||
|
|
||||||
```
|
```
|
||||||
## HELP
|
## HELP
|
||||||
```shell
|
```shell
|
||||||
|
@ -892,6 +892,10 @@ struct Args {
|
|||||||
/// Using this flag reallows users to ask for them.
|
/// Using this flag reallows users to ask for them.
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
enable_prefill_logprobs: bool,
|
enable_prefill_logprobs: bool,
|
||||||
|
|
||||||
|
/// Change timeout of graceful termination of the TGI server
|
||||||
|
#[clap(default_value = "90", long, short, env)]
|
||||||
|
graceful_termination_timeout: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -933,6 +937,7 @@ fn shard_manager(
|
|||||||
log_level: LevelFilter,
|
log_level: LevelFilter,
|
||||||
status_sender: mpsc::Sender<ShardStatus>,
|
status_sender: mpsc::Sender<ShardStatus>,
|
||||||
shutdown: Arc<AtomicBool>,
|
shutdown: Arc<AtomicBool>,
|
||||||
|
graceful_termination_timeout: u64,
|
||||||
_shutdown_sender: mpsc::Sender<()>,
|
_shutdown_sender: mpsc::Sender<()>,
|
||||||
) {
|
) {
|
||||||
// Enter shard-manager tracing span
|
// Enter shard-manager tracing span
|
||||||
@ -1206,7 +1211,12 @@ fn shard_manager(
|
|||||||
|
|
||||||
// We received a shutdown signal
|
// We received a shutdown signal
|
||||||
if shutdown.load(Ordering::SeqCst) {
|
if shutdown.load(Ordering::SeqCst) {
|
||||||
terminate("shard", p, Duration::from_secs(90)).unwrap();
|
terminate(
|
||||||
|
"shard",
|
||||||
|
p,
|
||||||
|
Duration::from_secs(graceful_termination_timeout),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1545,6 +1555,7 @@ fn spawn_shards(
|
|||||||
status_receiver: &mpsc::Receiver<ShardStatus>,
|
status_receiver: &mpsc::Receiver<ShardStatus>,
|
||||||
status_sender: mpsc::Sender<ShardStatus>,
|
status_sender: mpsc::Sender<ShardStatus>,
|
||||||
running: Arc<AtomicBool>,
|
running: Arc<AtomicBool>,
|
||||||
|
graceful_termination_timeout: u64,
|
||||||
) -> Result<(), LauncherError> {
|
) -> Result<(), LauncherError> {
|
||||||
// Start shard processes
|
// Start shard processes
|
||||||
for rank in 0..num_shard {
|
for rank in 0..num_shard {
|
||||||
@ -1612,6 +1623,7 @@ fn spawn_shards(
|
|||||||
max_log_level,
|
max_log_level,
|
||||||
status_sender,
|
status_sender,
|
||||||
shutdown,
|
shutdown,
|
||||||
|
graceful_termination_timeout,
|
||||||
shutdown_sender,
|
shutdown_sender,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
@ -1999,6 +2011,8 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
// Pattern match configuration
|
// Pattern match configuration
|
||||||
let args: Args = Args::parse();
|
let args: Args = Args::parse();
|
||||||
|
|
||||||
|
let graceful_termination_timeout = args.graceful_termination_timeout;
|
||||||
|
|
||||||
// Filter events with LOG_LEVEL
|
// Filter events with LOG_LEVEL
|
||||||
let varname = "LOG_LEVEL";
|
let varname = "LOG_LEVEL";
|
||||||
let env_filter = if let Ok(log_level) = std::env::var(varname) {
|
let env_filter = if let Ok(log_level) = std::env::var(varname) {
|
||||||
@ -2263,6 +2277,7 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
&status_receiver,
|
&status_receiver,
|
||||||
status_sender,
|
status_sender,
|
||||||
running.clone(),
|
running.clone(),
|
||||||
|
graceful_termination_timeout,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// We might have received a termination signal
|
// We might have received a termination signal
|
||||||
@ -2307,7 +2322,12 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Graceful termination
|
// Graceful termination
|
||||||
terminate("webserver", webserver, Duration::from_secs(90)).unwrap();
|
terminate(
|
||||||
|
"webserver",
|
||||||
|
webserver,
|
||||||
|
Duration::from_secs(graceful_termination_timeout),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
shutdown_shards(shutdown, &shutdown_receiver);
|
shutdown_shards(shutdown, &shutdown_receiver);
|
||||||
|
|
||||||
exit_code
|
exit_code
|
||||||
|
Loading…
Reference in New Issue
Block a user