Wait 2sec once shard is ready to improve stability (#92) (#94)

Co-authored-by: mswiniarsk <156412439+mswiniarsk@users.noreply.github.com>
2025-06-12 12:22:07 +00:00 · 2024-03-04 12:17:24 +01:00 · 2024-03-04 12:17:24 +01:00 · 8e14780bf4
commit 8e14780bf4
parent 80ae9ead28
2 changed files with 5 additions and 2 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -463,7 +463,7 @@ fn shard_manager(
    envs.push(("MAX_TOTAL_TOKENS".into(), max_total_tokens.to_string().into()));

    // Torch Distributed Env vars
-    if  world_size == 1 {
+    if world_size == 1 {
        envs.push(("RANK".into(), rank.to_string().into()));
    }
    envs.push(("WORLD_SIZE".into(), world_size.to_string().into()));
@ -603,6 +603,7 @@ fn shard_manager(
        // Shard is ready
        if uds.exists() && !ready {
            tracing::info!("Shard ready in {:?}", start_time.elapsed());
+            sleep(Duration::from_millis(2000));
            status_sender.send(ShardStatus::Ready).unwrap();
            ready = true;
        } else if !ready && wait_time.elapsed() > Duration::from_secs(10) {
--- a/router/src/main.rs
+++ b/router/src/main.rs
@ -142,7 +142,9 @@ fn main() -> Result<(), RouterError> {
    // This will only be used to validate payloads
    let local_path = Path::new(&tokenizer_name);
    let local_model = local_path.exists() && local_path.is_dir();
-    let skip_tokenizer_in_tgi = env::var("SKIP_TOKENIZER_IN_TGI").ok().map_or(false, |value| value.to_lowercase() == "true");
+    let skip_tokenizer_in_tgi = env::var("SKIP_TOKENIZER_IN_TGI")
+        .ok()
+        .map_or(false, |value| value.to_lowercase() == "true");
    let tokenizer = if skip_tokenizer_in_tgi {
        None
    } else if local_model {