Merge 592ea3f2f8 into 0b95693fb8

2025-09-12 12:54:52 +00:00 · 2024-07-29 11:22:36 -05:00 · 2024-07-29 11:22:36 -05:00 · 1246e2193f
commit 1246e2193f
parent 0b95693fb8 592ea3f2f8
2 changed files with 13 additions and 0 deletions
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -1178,6 +1178,7 @@ fn spawn_webserver(
    max_input_tokens: usize,
    max_total_tokens: usize,
    max_batch_prefill_tokens: u32,
    download_time: u64,
    shutdown: Arc<AtomicBool>,
    shutdown_receiver: &mpsc::Receiver<()>,
 ) -> Result<Child, LauncherError> {
@ -1304,6 +1305,8 @@ fn spawn_webserver(
        envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
    }
    envs.push(("DOWNLOAD_TIME".into(), download_time.to_string().into()));
    let mut webserver = match Command::new("text-generation-router")
        .args(router_args)
        .envs(envs)
@ -1370,6 +1373,7 @@ fn terminate(process_name: &str, mut process: Child, timeout: Duration) -> io::R
 fn main() -> Result<(), LauncherError> {
    // Pattern match configuration
    let args: Args = Args::parse();
    let start_time = Instant::now();
    // Filter events with LOG_LEVEL
    let varname = "LOG_LEVEL";
@ -1666,12 +1670,14 @@ fn main() -> Result<(), LauncherError> {
        return Ok(());
    }
    let download_time = start_time.elapsed().as_secs();
    let mut webserver = spawn_webserver(
        num_shard,
        args,
        max_input_tokens,
        max_total_tokens,
        max_batch_prefill_tokens,
        download_time,
        shutdown.clone(),
        &shutdown_receiver,
    )
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -55,6 +55,7 @@ use tower_http::cors::{AllowOrigin, CorsLayer};
 use tracing::{info_span, instrument, Instrument};
 use utoipa::OpenApi;
 use utoipa_swagger_ui::SwaggerUi;
 use tokio::time::Duration;
 /// Generate tokens if `stream == false` or a stream of token if `stream == true`
 #[utoipa::path(
@ -1509,6 +1510,8 @@ pub async fn run(
    )
    )]
    struct ApiDoc;
    let download_time = std::env::var("DOWNLOAD_TIME").unwrap_or("30".to_string()).parse::<u64>().unwrap_or(30);
    let length_time = Instant::now();
    // Create state
    if print_schema_command {
@ -1916,6 +1919,10 @@ pub async fn run(
        .layer(cors_layer);
    tracing::info!("Connected");
    let total_time = length_time.elapsed() + Duration::from_secs(download_time);
    metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64());
    if ngrok {
        #[cfg(feature = "ngrok")]