add prometheus port

This commit is contained in:
Mohit Sharma 2025-04-22 12:44:15 +00:00
parent 6545cdde0d
commit 136b9897d4
4 changed files with 21 additions and 0 deletions

View File

@ -36,6 +36,8 @@ struct Args {
hostname: String,
#[clap(default_value = "3000", long, short, env)]
port: u16,
#[clap(default_value = "9000", long, short, env)]
prometheus_port: u16,
#[clap(default_value = "/tmp/text-generation-server-0", long, env)]
master_shard_uds_path: String,
#[clap(default_value = "bigscience/bloom", long, env)]
@ -99,6 +101,7 @@ async fn main() -> Result<(), RouterError> {
max_batch_size,
hostname,
port,
prometheus_port,
master_shard_uds_path,
tokenizer_name,
tokenizer_config_path,
@ -198,6 +201,7 @@ async fn main() -> Result<(), RouterError> {
max_client_batch_size,
usage_stats,
payload_limit,
prometheus_port,
)
.await?;
Ok(())

View File

@ -36,6 +36,8 @@ struct Args {
hostname: String,
#[clap(default_value = "3000", long, short, env)]
port: u16,
#[clap(default_value = "9000", long, short, env)]
prometheus_port: u16,
#[clap(default_value = "/tmp/text-generation-server-0", long, env)]
master_shard_uds_path: String,
#[clap(default_value = "bigscience/bloom", long, env)]
@ -99,6 +101,7 @@ async fn main() -> Result<(), RouterError> {
max_batch_size,
hostname,
port,
prometheus_port,
master_shard_uds_path,
tokenizer_name,
tokenizer_config_path,
@ -214,6 +217,7 @@ async fn main() -> Result<(), RouterError> {
max_client_batch_size,
usage_stats,
payload_limit,
prometheus_port,
)
.await?;
Ok(())

View File

@ -773,6 +773,10 @@ struct Args {
#[clap(default_value = "3000", long, short, env)]
port: u16,
/// The Prometheus port to listen on.
#[clap(default_value = "9000", long, short, env)]
prometheus_port: u16,
/// The name of the socket for gRPC communication between the webserver
/// and the shards.
#[clap(default_value = "/tmp/text-generation-server", long, env)]
@ -1848,6 +1852,8 @@ fn spawn_webserver(
args.hostname.to_string(),
"--port".to_string(),
args.port.to_string(),
"--prometheus-port".to_string(),
args.prometheus_port.to_string(),
"--master-shard-uds-path".to_string(),
format!("{}-0", args.shard_uds_path),
"--tokenizer-name".to_string(),

View File

@ -1496,6 +1496,7 @@ pub async fn run(
max_client_batch_size: usize,
usage_stats_level: usage_stats::UsageStatsLevel,
payload_limit: usize,
prometheus_port: u16,
) -> Result<(), WebServerError> {
// CORS allowed origins
// map to go inside the option and then map to parse from String to HeaderValue
@ -1799,6 +1800,7 @@ pub async fn run(
compat_return_full_text,
allow_origin,
payload_limit,
prometheus_port,
)
.await;
@ -1860,6 +1862,7 @@ async fn start(
compat_return_full_text: bool,
allow_origin: Option<AllowOrigin>,
payload_limit: usize,
prometheus_port: u16,
) -> Result<(), WebServerError> {
// Determine the server port based on the feature and environment variable.
let port = if cfg!(feature = "google") {
@ -1933,8 +1936,12 @@ async fn start(
// let skipped_matcher = Matcher::Full(String::from("tgi_request_skipped_tokens"));
// let skipped_buckets: Vec<f64> = (0..shard_info.speculate + 1).map(|x| x as f64).collect();
let mut p_addr = addr;
p_addr.set_port(prometheus_port);
// Prometheus handler
let builder = PrometheusBuilder::new()
.with_http_listener(p_addr)
.set_buckets_for_metric(duration_matcher, &duration_buckets)
.unwrap()
.set_buckets_for_metric(input_length_matcher, &input_length_buckets)