diff --git a/router/src/health.rs b/router/src/health.rs index ab290fc1..24888a0b 100644 --- a/router/src/health.rs +++ b/router/src/health.rs @@ -23,7 +23,7 @@ impl Health { } pub(crate) async fn check(&mut self) -> bool { - if self.generation_health.load(Ordering::SeqCst) { + if self.generation_health.load(Ordering::Relaxed) { // Generation is healthy, we only check that the shards are answering gRPC calls self.client.health().await.is_ok() } else { @@ -61,7 +61,7 @@ impl Health { // Skips the queue let value = self.client.prefill(batch).await.is_ok(); // Update generation health - self.generation_health.store(value, Ordering::SeqCst); + self.generation_health.store(value, Ordering::Relaxed); value } } diff --git a/router/src/infer.rs b/router/src/infer.rs index 8547df98..22131945 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -385,7 +385,7 @@ async fn prefill( match client.prefill(batch).await { Ok((generations, next_batch)) => { // Update health - generation_health.store(true, Ordering::SeqCst); + generation_health.store(true, Ordering::Relaxed); // Send generated tokens and filter stopped entries filter_send_generations(generations, entries); @@ -399,7 +399,7 @@ async fn prefill( // If we have an error, we discard the whole batch Err(err) => { // Update health - generation_health.store(false, Ordering::SeqCst); + generation_health.store(false, Ordering::Relaxed); let _ = client.clear_cache(Some(batch_id)).await; send_errors(err, entries); metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill"); @@ -424,7 +424,7 @@ async fn decode( match client.decode(batches).await { Ok((generations, next_batch)) => { // Update health - generation_health.store(true, Ordering::SeqCst); + generation_health.store(true, Ordering::Relaxed); // Send generated tokens and filter stopped entries filter_send_generations(generations, entries); @@ -437,7 +437,7 @@ async fn decode( } // If we have an error, we discard the whole batch Err(err) => { - generation_health.store(false, Ordering::SeqCst); + generation_health.store(false, Ordering::Relaxed); for id in batch_ids { let _ = client.clear_cache(Some(id)).await; }