mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
add port for trtllm and llamacpp
This commit is contained in:
parent
12b1cf89cf
commit
1cbda4f541
@ -119,6 +119,9 @@ struct Args {
|
|||||||
#[clap(default_value = "3000", long, short, env)]
|
#[clap(default_value = "3000", long, short, env)]
|
||||||
port: u16,
|
port: u16,
|
||||||
|
|
||||||
|
#[clap(default_value = "9000", long, short, env)]
|
||||||
|
prometheus_port: u16,
|
||||||
|
|
||||||
/// Enable JSON output format.
|
/// Enable JSON output format.
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
json_output: bool,
|
json_output: bool,
|
||||||
@ -317,6 +320,7 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
args.max_client_batch_size,
|
args.max_client_batch_size,
|
||||||
args.usage_stats,
|
args.usage_stats,
|
||||||
args.payload_limit,
|
args.payload_limit,
|
||||||
|
args.prometheus_port,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -37,6 +37,8 @@ struct Args {
|
|||||||
hostname: String,
|
hostname: String,
|
||||||
#[clap(default_value = "3000", long, short, env)]
|
#[clap(default_value = "3000", long, short, env)]
|
||||||
port: u16,
|
port: u16,
|
||||||
|
#[clap(default_value = "9000", long, short, env)]
|
||||||
|
prometheus_port: u16,
|
||||||
#[clap(long, env, required = true)]
|
#[clap(long, env, required = true)]
|
||||||
tokenizer_name: String,
|
tokenizer_name: String,
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
@ -227,6 +229,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> {
|
|||||||
max_batch_total_tokens,
|
max_batch_total_tokens,
|
||||||
hostname,
|
hostname,
|
||||||
port,
|
port,
|
||||||
|
prometheus_port,
|
||||||
tokenizer_name,
|
tokenizer_name,
|
||||||
tokenizer_config_path,
|
tokenizer_config_path,
|
||||||
revision,
|
revision,
|
||||||
@ -322,6 +325,7 @@ async fn main() -> Result<(), TensorRtLlmBackendError> {
|
|||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
usage_stats,
|
usage_stats,
|
||||||
payload_limit,
|
payload_limit,
|
||||||
|
prometheus_port
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
Loading…
Reference in New Issue
Block a user