enviroment variable approach

This commit is contained in:
Edwinhr716 2024-07-25 23:12:33 +00:00
parent c27075d349
commit 9697d16207
3 changed files with 6 additions and 12 deletions

View File

@ -1162,7 +1162,7 @@ fn spawn_webserver(
max_input_tokens: usize, max_input_tokens: usize,
max_total_tokens: usize, max_total_tokens: usize,
max_batch_prefill_tokens: u32, max_batch_prefill_tokens: u32,
startup_time: u64, download_time: u64,
shutdown: Arc<AtomicBool>, shutdown: Arc<AtomicBool>,
shutdown_receiver: &mpsc::Receiver<()>, shutdown_receiver: &mpsc::Receiver<()>,
) -> Result<Child, LauncherError> { ) -> Result<Child, LauncherError> {
@ -1200,8 +1200,6 @@ fn spawn_webserver(
format!("{}-0", args.shard_uds_path), format!("{}-0", args.shard_uds_path),
"--tokenizer-name".to_string(), "--tokenizer-name".to_string(),
args.model_id, args.model_id,
"--startup-time".to_string(),
startup_time.to_string(),
]; ];
// Grammar support // Grammar support
@ -1278,6 +1276,8 @@ fn spawn_webserver(
envs.push(("COMPUTE_TYPE".into(), compute_type.into())) envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
} }
envs.push(("DOWNLOAD_TIME".into(), download_time.to_string().into()));
let mut webserver = match Command::new("text-generation-router") let mut webserver = match Command::new("text-generation-router")
.args(router_args) .args(router_args)
.envs(envs) .envs(envs)

View File

@ -87,8 +87,6 @@ struct Args {
disable_grammar_support: bool, disable_grammar_support: bool,
#[clap(default_value = "4", long, env)] #[clap(default_value = "4", long, env)]
max_client_batch_size: usize, max_client_batch_size: usize,
#[clap(long, env)]
startup_time: u64,
} }
#[derive(Debug, Subcommand)] #[derive(Debug, Subcommand)]
@ -131,7 +129,6 @@ async fn main() -> Result<(), RouterError> {
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
command, command,
startup_time,
} = args; } = args;
let print_schema_command = match command { let print_schema_command = match command {
@ -381,8 +378,6 @@ async fn main() -> Result<(), RouterError> {
} }
}; };
tracing::info!("start time of the model is {startup_time}");
// Run server // Run server
server::run( server::run(
master_shard_uds_path, master_shard_uds_path,
@ -414,7 +409,6 @@ async fn main() -> Result<(), RouterError> {
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
print_schema_command, print_schema_command,
startup_time,
) )
.await?; .await?;
Ok(()) Ok(())

View File

@ -1434,7 +1434,6 @@ pub async fn run(
grammar_support: bool, grammar_support: bool,
max_client_batch_size: usize, max_client_batch_size: usize,
print_schema_command: bool, print_schema_command: bool,
start_time: u64,
) -> Result<(), WebServerError> { ) -> Result<(), WebServerError> {
// OpenAPI documentation // OpenAPI documentation
#[derive(OpenApi)] #[derive(OpenApi)]
@ -1514,6 +1513,7 @@ pub async fn run(
) )
)] )]
struct ApiDoc; struct ApiDoc;
let download_time = std::env::var("DOWNLOAD_TIME").unwrap_or("30".to_string()).parse::<u64>().unwrap_or(30);
let length_time = Instant::now(); let length_time = Instant::now();
// Create state // Create state
@ -1895,7 +1895,7 @@ pub async fn run(
.layer(cors_layer); .layer(cors_layer);
tracing::info!("Connected"); tracing::info!("Connected");
let total_time = length_time.elapsed() + Duration::from_secs(start_time); let total_time = length_time.elapsed() + Duration::from_secs(download_time);
tracing::info!("total time for router to boot up and connect to model server {:?}", length_time.elapsed()); tracing::info!("total time for router to boot up and connect to model server {:?}", length_time.elapsed());
tracing::info!("the total time in secs of boot time is {:?}", total_time); tracing::info!("the total time in secs of boot time is {:?}", total_time);
metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64()); metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64());