From a3bdaca01433ac7041f26bcde86c9c97bcdc207a Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Sun, 9 Apr 2023 18:30:08 +0200 Subject: [PATCH] force as_secs --- router/src/infer.rs | 6 +- router/src/server.rs | 189 +++++++++++++++++++++++-------------------- 2 files changed, 104 insertions(+), 91 deletions(-) diff --git a/router/src/infer.rs b/router/src/infer.rs index a13c464d..5a4375ae 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -335,11 +335,12 @@ async fn prefill( ) -> Option { let start_time = Instant::now(); let batch_id = batch.id; + metrics::increment_counter!("tgi_batch_inference_count", "method" => "prefill"); match client.prefill(batch).await { Ok((generations, next_batch)) => { send_generations(generations, entries); - metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed(), "method" => "prefill"); + metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "prefill"); metrics::increment_counter!("tgi_batch_inference_success", "method" => "prefill"); next_batch } @@ -360,11 +361,12 @@ async fn decode( entries: &mut IntMap, ) -> Option { let start_time = Instant::now(); + metrics::increment_counter!("tgi_batch_inference_count", "method" => "decode"); match client.decode(batches).await { Ok((generations, next_batch)) => { send_generations(generations, entries); - metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed(), "method" => "decode"); + metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "decode"); metrics::increment_counter!("tgi_batch_inference_success", "method" => "decode"); next_batch } diff --git a/router/src/server.rs b/router/src/server.rs index 9bf59175..174403b0 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -87,21 +87,21 @@ async fn health(infer: Extension) -> Result<(), (StatusCode, Json) -> String { prom_handle.render() @@ -465,36 +476,36 @@ pub async fn run( // OpenAPI documentation #[derive(OpenApi)] #[openapi( - paths( - generate, - generate_stream, - metrics, - ), - components( - schemas( - GenerateRequest, - GenerateParameters, - PrefillToken, - Token, - GenerateResponse, - BestOfSequence, - Details, - FinishReason, - StreamResponse, - StreamDetails, - ErrorResponse, - ) - ), - tags( - (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API") - ), - info( - title = "Text Generation Inference", - license( - name = "Apache 2.0", - url = "https://www.apache.org/licenses/LICENSE-2.0" - ) - ) + paths( + generate, + generate_stream, + metrics, + ), + components( + schemas( + GenerateRequest, + GenerateParameters, + PrefillToken, + Token, + GenerateResponse, + BestOfSequence, + Details, + FinishReason, + StreamResponse, + StreamDetails, + ErrorResponse, + ) + ), + tags( + (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API") + ), + info( + title = "Text Generation Inference", + license( + name = "Apache 2.0", + url = "https://www.apache.org/licenses/LICENSE-2.0" + ) + ) )] struct ApiDoc;