diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index b99e9591..9ee61ce6 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -119,6 +119,9 @@ struct Args { #[clap(default_value = "3000", long, short, env)] port: u16, + #[clap(default_value = "9000", long, short, env)] + prometheus_port: u16, + /// Enable JSON output format. #[clap(long, env)] json_output: bool, @@ -317,6 +320,7 @@ async fn main() -> Result<(), RouterError> { args.max_client_batch_size, args.usage_stats, args.payload_limit, + args.prometheus_port, ) .await?; Ok(()) diff --git a/backends/trtllm/src/main.rs b/backends/trtllm/src/main.rs index 9d4bf8f2..ab9ba4f9 100644 --- a/backends/trtllm/src/main.rs +++ b/backends/trtllm/src/main.rs @@ -37,6 +37,8 @@ struct Args { hostname: String, #[clap(default_value = "3000", long, short, env)] port: u16, + #[clap(default_value = "9000", long, short, env)] + prometheus_port: u16, #[clap(long, env, required = true)] tokenizer_name: String, #[clap(long, env)] @@ -227,6 +229,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> { max_batch_total_tokens, hostname, port, + prometheus_port, tokenizer_name, tokenizer_config_path, revision, @@ -322,6 +325,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> { max_client_batch_size, usage_stats, payload_limit, + prometheus_port ) .await?; Ok(())