mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
add metrics
This commit is contained in:
parent
889897fe69
commit
c3ad942e9f
@ -265,12 +265,13 @@ async fn batching_task(
|
|||||||
let batch_max_tokens = batch.max_tokens;
|
let batch_max_tokens = batch.max_tokens;
|
||||||
let mut batches = vec![batch];
|
let mut batches = vec![batch];
|
||||||
metrics::gauge!("tgi_batch_current_size", batch_size as f64);
|
metrics::gauge!("tgi_batch_current_size", batch_size as f64);
|
||||||
|
metrics::gauge!("tgi_batch_current_max_tokens", batch_max_tokens as f64);
|
||||||
|
|
||||||
let min_size = match waiting_tokens {
|
let min_size = match waiting_tokens {
|
||||||
// If we didn't onboard any new requests since >= max_waiting_tokens, we try
|
// If we didn't onboard any new requests since >= max_waiting_tokens, we try
|
||||||
// to add a new batch even though its size might be small
|
// to add a new batch even though its size might be small
|
||||||
_ if waiting_tokens >= max_waiting_tokens => None,
|
_ if waiting_tokens >= max_waiting_tokens => None,
|
||||||
// Minimum size criteria
|
// Minimum batch size
|
||||||
_ => Some((batch_size as f32 * waiting_served_ratio).floor() as usize),
|
_ => Some((batch_size as f32 * waiting_served_ratio).floor() as usize),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -281,6 +282,11 @@ async fn batching_task(
|
|||||||
queue.next_batch(min_size, token_budget).await
|
queue.next_batch(min_size, token_budget).await
|
||||||
{
|
{
|
||||||
// Tracking metrics
|
// Tracking metrics
|
||||||
|
if min_size.is_some() {
|
||||||
|
metrics::increment_counter!("tgi_batch_concat", "reason" => "backpressure");
|
||||||
|
} else {
|
||||||
|
metrics::increment_counter!("tgi_batch_concat", "reason" => "wait_exceeded");
|
||||||
|
}
|
||||||
|
|
||||||
entries.iter_mut().for_each(|(_, entry)| {
|
entries.iter_mut().for_each(|(_, entry)| {
|
||||||
// Create a new span to add the info that this entry is waiting
|
// Create a new span to add the info that this entry is waiting
|
||||||
@ -326,6 +332,7 @@ async fn batching_task(
|
|||||||
waiting_tokens += 1;
|
waiting_tokens += 1;
|
||||||
}
|
}
|
||||||
metrics::gauge!("tgi_batch_current_size", 0.0);
|
metrics::gauge!("tgi_batch_current_size", 0.0);
|
||||||
|
metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user