mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
feat(metrics): exposes queue size as tokens along with individual requests count
This commit is contained in:
parent
5eec3a8bb6
commit
bb8f59632f
@ -138,8 +138,9 @@ async fn queue_task(
|
|||||||
while let Some(cmd) = receiver.recv().await {
|
while let Some(cmd) = receiver.recv().await {
|
||||||
match cmd {
|
match cmd {
|
||||||
QueueCommand::Append(entry, span) => {
|
QueueCommand::Append(entry, span) => {
|
||||||
span.in_scope(|| state.append(*entry));
|
|
||||||
metrics::gauge!("tgi_queue_size").increment(1.0);
|
metrics::gauge!("tgi_queue_size").increment(1.0);
|
||||||
|
metrics::gauge!("tgi_queue_size_tokens").increment(entry.request.input_length);
|
||||||
|
span.in_scope(|| state.append(*entry));
|
||||||
}
|
}
|
||||||
QueueCommand::NextBatch {
|
QueueCommand::NextBatch {
|
||||||
min_size,
|
min_size,
|
||||||
@ -154,7 +155,15 @@ async fn queue_task(
|
|||||||
.instrument(span)
|
.instrument(span)
|
||||||
.await;
|
.await;
|
||||||
response_sender.send(next_batch).unwrap();
|
response_sender.send(next_batch).unwrap();
|
||||||
|
|
||||||
metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
|
metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
|
||||||
|
metrics::gauge!("tgi_queue_size_tokens").set(
|
||||||
|
state
|
||||||
|
.entries
|
||||||
|
.iter()
|
||||||
|
.map(|(_, e)| e.request.input_length as f64)
|
||||||
|
.sum::<f64>(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,7 @@ use tracing::{info_span, instrument, Instrument};
|
|||||||
use utoipa::OpenApi;
|
use utoipa::OpenApi;
|
||||||
use utoipa_swagger_ui::SwaggerUi;
|
use utoipa_swagger_ui::SwaggerUi;
|
||||||
|
|
||||||
|
|
||||||
fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<SimpleToken> {
|
fn encoding_to_tokens(encoding: &tokenizers::Encoding, input: &str) -> Vec<SimpleToken> {
|
||||||
let offsets = encoding.get_offsets();
|
let offsets = encoding.get_offsets();
|
||||||
let input_ids = encoding.get_ids();
|
let input_ids = encoding.get_ids();
|
||||||
@ -2171,6 +2172,11 @@ async fn start(
|
|||||||
"Current batch size"
|
"Current batch size"
|
||||||
);
|
);
|
||||||
metrics::describe_gauge!("tgi_queue_size", metrics::Unit::Count, "Current queue size");
|
metrics::describe_gauge!("tgi_queue_size", metrics::Unit::Count, "Current queue size");
|
||||||
|
metrics::describe_gauge!(
|
||||||
|
"tgi_queue_size_tokens",
|
||||||
|
metrics::Unit::Count,
|
||||||
|
"Current queue size in number of tokens"
|
||||||
|
);
|
||||||
metrics::describe_gauge!(
|
metrics::describe_gauge!(
|
||||||
"tgi_batch_current_max_tokens",
|
"tgi_batch_current_max_tokens",
|
||||||
metrics::Unit::Count,
|
metrics::Unit::Count,
|
||||||
|
Loading…
Reference in New Issue
Block a user