diff --git a/load_tests/starcoder_load.js b/load_tests/starcoder_load.js index 76316b65..d429ce07 100644 --- a/load_tests/starcoder_load.js +++ b/load_tests/starcoder_load.js @@ -55,6 +55,7 @@ export default function () { if (res.status === 200) { totalTime.add(res.headers["X-Total-Time"]); + totalTokens.add(res.headers["X-Total-Tokens"]); validationTime.add(res.headers["X-Validation-Time"]); queueTime.add(res.headers["X-Queue-Time"]); inferenceTime.add(res.headers["X-Inference-Time"]); diff --git a/router/src/server.rs b/router/src/server.rs index 8ca463c2..1246b17e 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -241,6 +241,10 @@ async fn generate( "x-total-time", total_time.as_millis().to_string().parse().unwrap(), ); + headers.insert( + "x-total-tokens", + response.generated_text.generated_tokens.to_string().parse().unwrap(), + ); headers.insert( "x-validation-time", validation_time.as_millis().to_string().parse().unwrap(),