mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
enviroment variable approach
This commit is contained in:
parent
c27075d349
commit
9697d16207
@ -1162,7 +1162,7 @@ fn spawn_webserver(
|
|||||||
max_input_tokens: usize,
|
max_input_tokens: usize,
|
||||||
max_total_tokens: usize,
|
max_total_tokens: usize,
|
||||||
max_batch_prefill_tokens: u32,
|
max_batch_prefill_tokens: u32,
|
||||||
startup_time: u64,
|
download_time: u64,
|
||||||
shutdown: Arc<AtomicBool>,
|
shutdown: Arc<AtomicBool>,
|
||||||
shutdown_receiver: &mpsc::Receiver<()>,
|
shutdown_receiver: &mpsc::Receiver<()>,
|
||||||
) -> Result<Child, LauncherError> {
|
) -> Result<Child, LauncherError> {
|
||||||
@ -1200,8 +1200,6 @@ fn spawn_webserver(
|
|||||||
format!("{}-0", args.shard_uds_path),
|
format!("{}-0", args.shard_uds_path),
|
||||||
"--tokenizer-name".to_string(),
|
"--tokenizer-name".to_string(),
|
||||||
args.model_id,
|
args.model_id,
|
||||||
"--startup-time".to_string(),
|
|
||||||
startup_time.to_string(),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
// Grammar support
|
// Grammar support
|
||||||
@ -1278,6 +1276,8 @@ fn spawn_webserver(
|
|||||||
envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
|
envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
envs.push(("DOWNLOAD_TIME".into(), download_time.to_string().into()));
|
||||||
|
|
||||||
let mut webserver = match Command::new("text-generation-router")
|
let mut webserver = match Command::new("text-generation-router")
|
||||||
.args(router_args)
|
.args(router_args)
|
||||||
.envs(envs)
|
.envs(envs)
|
||||||
|
@ -87,8 +87,6 @@ struct Args {
|
|||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
#[clap(default_value = "4", long, env)]
|
#[clap(default_value = "4", long, env)]
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
#[clap(long, env)]
|
|
||||||
startup_time: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Subcommand)]
|
#[derive(Debug, Subcommand)]
|
||||||
@ -131,7 +129,6 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
command,
|
command,
|
||||||
startup_time,
|
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
let print_schema_command = match command {
|
let print_schema_command = match command {
|
||||||
@ -381,8 +378,6 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
tracing::info!("start time of the model is {startup_time}");
|
|
||||||
|
|
||||||
// Run server
|
// Run server
|
||||||
server::run(
|
server::run(
|
||||||
master_shard_uds_path,
|
master_shard_uds_path,
|
||||||
@ -414,7 +409,6 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
print_schema_command,
|
print_schema_command,
|
||||||
startup_time,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -1434,7 +1434,6 @@ pub async fn run(
|
|||||||
grammar_support: bool,
|
grammar_support: bool,
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
print_schema_command: bool,
|
print_schema_command: bool,
|
||||||
start_time: u64,
|
|
||||||
) -> Result<(), WebServerError> {
|
) -> Result<(), WebServerError> {
|
||||||
// OpenAPI documentation
|
// OpenAPI documentation
|
||||||
#[derive(OpenApi)]
|
#[derive(OpenApi)]
|
||||||
@ -1514,6 +1513,7 @@ pub async fn run(
|
|||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
struct ApiDoc;
|
struct ApiDoc;
|
||||||
|
let download_time = std::env::var("DOWNLOAD_TIME").unwrap_or("30".to_string()).parse::<u64>().unwrap_or(30);
|
||||||
let length_time = Instant::now();
|
let length_time = Instant::now();
|
||||||
|
|
||||||
// Create state
|
// Create state
|
||||||
@ -1895,11 +1895,11 @@ pub async fn run(
|
|||||||
.layer(cors_layer);
|
.layer(cors_layer);
|
||||||
|
|
||||||
tracing::info!("Connected");
|
tracing::info!("Connected");
|
||||||
let total_time = length_time.elapsed() + Duration::from_secs(start_time);
|
let total_time = length_time.elapsed() + Duration::from_secs(download_time);
|
||||||
tracing::info!("total time for router to boot up and connect to model server {:?}", length_time.elapsed());
|
tracing::info!("total time for router to boot up and connect to model server {:?}", length_time.elapsed());
|
||||||
tracing::info!("the total time in secs of boot time is {:?}", total_time);
|
tracing::info!("the total time in secs of boot time is {:?}", total_time);
|
||||||
metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64());
|
metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if ngrok {
|
if ngrok {
|
||||||
|
Loading…
Reference in New Issue
Block a user