mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
feat(metrics): exposes queue size as tokens along with individual requests count
This commit is contained in:
parent
5eec3a8bb6
commit
bb8f59632f
@ -138,8 +138,9 @@ async fn queue_task(
|
||||
while let Some(cmd) = receiver.recv().await {
|
||||
match cmd {
|
||||
QueueCommand::Append(entry, span) => {
|
||||
span.in_scope(|| state.append(*entry));
|
||||
metrics::gauge!("tgi_queue_size").increment(1.0);
|
||||
metrics::gauge!("tgi_queue_size_tokens").increment(entry.request.input_length);
|
||||
span.in_scope(|| state.append(*entry));
|
||||
}
|
||||
QueueCommand::NextBatch {
|
||||
min_size,
|
||||
@ -154,7 +155,15 @@ async fn queue_task(
|
||||
.instrument(span)
|
||||
.await;
|
||||
response_sender.send(next_batch).unwrap();
|
||||
|
||||
metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
|
||||
metrics::gauge!("tgi_queue_size_tokens").set(
|
||||
state
|
||||
.entries
|
||||
.iter()
|
||||
.map(|(_, e)| e.request.input_length as f64)
|
||||
.sum::<f64>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ use tracing::{info_span, instrument, Instrument};
|
||||
use utoipa::OpenApi;
|
||||
use utoipa_swagger_ui::SwaggerUi;
|
||||
|
||||
|
||||
fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<SimpleToken> {
|
||||
let offsets = encoding.get_offsets();
|
||||
let input_ids = encoding.get_ids();
|
||||
@ -2171,6 +2172,11 @@ async fn start(
|
||||
"Current batch size"
|
||||
);
|
||||
metrics::describe_gauge!("tgi_queue_size", metrics::Unit::Count, "Current queue size");
|
||||
metrics::describe_gauge!(
|
||||
"tgi_queue_size_tokens",
|
||||
metrics::Unit::Count,
|
||||
"Current queue size in number of tokens"
|
||||
);
|
||||
metrics::describe_gauge!(
|
||||
"tgi_batch_current_max_tokens",
|
||||
metrics::Unit::Count,
|
||||
|
Loading…
Reference in New Issue
Block a user