mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
use relaxed
This commit is contained in:
parent
3756a5f1e2
commit
df419a21e0
@ -23,7 +23,7 @@ impl Health {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn check(&mut self) -> bool {
|
pub(crate) async fn check(&mut self) -> bool {
|
||||||
if self.generation_health.load(Ordering::SeqCst) {
|
if self.generation_health.load(Ordering::Relaxed) {
|
||||||
// Generation is healthy, we only check that the shards are answering gRPC calls
|
// Generation is healthy, we only check that the shards are answering gRPC calls
|
||||||
self.client.health().await.is_ok()
|
self.client.health().await.is_ok()
|
||||||
} else {
|
} else {
|
||||||
@ -61,7 +61,7 @@ impl Health {
|
|||||||
// Skips the queue
|
// Skips the queue
|
||||||
let value = self.client.prefill(batch).await.is_ok();
|
let value = self.client.prefill(batch).await.is_ok();
|
||||||
// Update generation health
|
// Update generation health
|
||||||
self.generation_health.store(value, Ordering::SeqCst);
|
self.generation_health.store(value, Ordering::Relaxed);
|
||||||
value
|
value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -385,7 +385,7 @@ async fn prefill(
|
|||||||
match client.prefill(batch).await {
|
match client.prefill(batch).await {
|
||||||
Ok((generations, next_batch)) => {
|
Ok((generations, next_batch)) => {
|
||||||
// Update health
|
// Update health
|
||||||
generation_health.store(true, Ordering::SeqCst);
|
generation_health.store(true, Ordering::Relaxed);
|
||||||
// Send generated tokens and filter stopped entries
|
// Send generated tokens and filter stopped entries
|
||||||
filter_send_generations(generations, entries);
|
filter_send_generations(generations, entries);
|
||||||
|
|
||||||
@ -399,7 +399,7 @@ async fn prefill(
|
|||||||
// If we have an error, we discard the whole batch
|
// If we have an error, we discard the whole batch
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
// Update health
|
// Update health
|
||||||
generation_health.store(false, Ordering::SeqCst);
|
generation_health.store(false, Ordering::Relaxed);
|
||||||
let _ = client.clear_cache(Some(batch_id)).await;
|
let _ = client.clear_cache(Some(batch_id)).await;
|
||||||
send_errors(err, entries);
|
send_errors(err, entries);
|
||||||
metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill");
|
metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill");
|
||||||
@ -424,7 +424,7 @@ async fn decode(
|
|||||||
match client.decode(batches).await {
|
match client.decode(batches).await {
|
||||||
Ok((generations, next_batch)) => {
|
Ok((generations, next_batch)) => {
|
||||||
// Update health
|
// Update health
|
||||||
generation_health.store(true, Ordering::SeqCst);
|
generation_health.store(true, Ordering::Relaxed);
|
||||||
// Send generated tokens and filter stopped entries
|
// Send generated tokens and filter stopped entries
|
||||||
filter_send_generations(generations, entries);
|
filter_send_generations(generations, entries);
|
||||||
|
|
||||||
@ -437,7 +437,7 @@ async fn decode(
|
|||||||
}
|
}
|
||||||
// If we have an error, we discard the whole batch
|
// If we have an error, we discard the whole batch
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
generation_health.store(false, Ordering::SeqCst);
|
generation_health.store(false, Ordering::Relaxed);
|
||||||
for id in batch_ids {
|
for id in batch_ids {
|
||||||
let _ = client.clear_cache(Some(id)).await;
|
let _ = client.clear_cache(Some(id)).await;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user