feat(metrics): exposes queue size as tokens along with individual requests count

This commit is contained in:
Morgan Funtowicz 2025-02-27 14:32:51 +01:00
parent 5eec3a8bb6
commit bb8f59632f
2 changed files with 16 additions and 1 deletions

View File

@ -138,8 +138,9 @@ async fn queue_task(
while let Some(cmd) = receiver.recv().await { while let Some(cmd) = receiver.recv().await {
match cmd { match cmd {
QueueCommand::Append(entry, span) => { QueueCommand::Append(entry, span) => {
span.in_scope(|| state.append(*entry));
metrics::gauge!("tgi_queue_size").increment(1.0); metrics::gauge!("tgi_queue_size").increment(1.0);
metrics::gauge!("tgi_queue_size_tokens").increment(entry.request.input_length);
span.in_scope(|| state.append(*entry));
} }
QueueCommand::NextBatch { QueueCommand::NextBatch {
min_size, min_size,
@ -154,7 +155,15 @@ async fn queue_task(
.instrument(span) .instrument(span)
.await; .await;
response_sender.send(next_batch).unwrap(); response_sender.send(next_batch).unwrap();
metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64); metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
metrics::gauge!("tgi_queue_size_tokens").set(
state
.entries
.iter()
.map(|(_, e)| e.request.input_length as f64)
.sum::<f64>(),
);
} }
} }
} }

View File

@ -67,6 +67,7 @@ use tracing::{info_span, instrument, Instrument};
use utoipa::OpenApi; use utoipa::OpenApi;
use utoipa_swagger_ui::SwaggerUi; use utoipa_swagger_ui::SwaggerUi;
fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<SimpleToken> { fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<SimpleToken> {
let offsets = encoding.get_offsets(); let offsets = encoding.get_offsets();
let input_ids = encoding.get_ids(); let input_ids = encoding.get_ids();
@ -2171,6 +2172,11 @@ async fn start(
"Current batch size" "Current batch size"
); );
metrics::describe_gauge!("tgi_queue_size", metrics::Unit::Count, "Current queue size"); metrics::describe_gauge!("tgi_queue_size", metrics::Unit::Count, "Current queue size");
metrics::describe_gauge!(
"tgi_queue_size_tokens",
metrics::Unit::Count,
"Current queue size in number of tokens"
);
metrics::describe_gauge!( metrics::describe_gauge!(
"tgi_batch_current_max_tokens", "tgi_batch_current_max_tokens",
metrics::Unit::Count, metrics::Unit::Count,