mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Adding max capacity metric.
This commit is contained in:
parent
3b0b66bedb
commit
6e7189f2aa
@ -100,6 +100,7 @@ pub async fn connect_backend(
|
|||||||
.map_err(V3Error::Warmup)?,
|
.map_err(V3Error::Warmup)?,
|
||||||
)?;
|
)?;
|
||||||
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
||||||
|
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
|
||||||
|
|
||||||
let backend_info = BackendInfo {
|
let backend_info = BackendInfo {
|
||||||
waiting_served_ratio,
|
waiting_served_ratio,
|
||||||
|
@ -1993,8 +1993,6 @@ async fn start(
|
|||||||
"Batch size of the next batch"
|
"Batch size of the next batch"
|
||||||
);
|
);
|
||||||
|
|
||||||
metrics::counter!("tgi_max_token_capacity").increment(u64::from(max_batch_total_tokens));
|
|
||||||
|
|
||||||
// CORS layer
|
// CORS layer
|
||||||
let allow_origin = allow_origin.unwrap_or(AllowOrigin::any());
|
let allow_origin = allow_origin.unwrap_or(AllowOrigin::any());
|
||||||
let cors_layer = CorsLayer::new()
|
let cors_layer = CorsLayer::new()
|
||||||
|
Loading…
Reference in New Issue
Block a user