From c5a4a1faf3c430c11f09128a6e1dc40b4736e212 Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Wed, 15 Feb 2023 16:11:32 +0100 Subject: [PATCH] feat(server): improve download logging (#66) --- launcher/src/main.rs | 29 +++++++++++++------------ server/text_generation/utils/convert.py | 6 +---- server/text_generation/utils/hub.py | 13 +++++------ 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 0848dd9a..ecb2c0b6 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -22,8 +22,8 @@ struct Args { model_id: String, #[clap(long, env)] revision: Option, - #[clap(long, env)] - num_shard: Option, + #[clap(default_value = "1", long, env)] + num_shard: usize, #[clap(long, env)] quantize: bool, #[clap(default_value = "128", long, env)] @@ -54,6 +54,16 @@ struct Args { fn main() -> ExitCode { // Pattern match configuration + let args = Args::parse(); + + if args.json_output { + tracing_subscriber::fmt().json().init(); + } else { + tracing_subscriber::fmt().compact().init(); + } + + tracing::info!("{:?}", args); + let Args { model_id, revision, @@ -71,16 +81,7 @@ fn main() -> ExitCode { weights_cache_override, json_output, otlp_endpoint, - } = Args::parse(); - - if json_output { - tracing_subscriber::fmt().json().init(); - } else { - tracing_subscriber::fmt().compact().init(); - } - - // By default we only have one master shard - let num_shard = num_shard.unwrap_or(1); + } = args; // Signal handler let running = Arc::new(AtomicBool::new(true)); @@ -123,7 +124,7 @@ fn main() -> ExitCode { }; // Start process - tracing::info!("Starting download"); + tracing::info!("Starting download process."); let mut download_process = match Popen::create( &download_argv, PopenConfig { @@ -184,7 +185,7 @@ fn main() -> ExitCode { } } _ => { - tracing::error!("Download process exited with an unkown status."); + tracing::error!("Download process exited with an unknown status."); return ExitCode::FAILURE; } } diff --git a/server/text_generation/utils/convert.py b/server/text_generation/utils/convert.py index e7f9660c..30144f0c 100644 --- a/server/text_generation/utils/convert.py +++ b/server/text_generation/utils/convert.py @@ -83,14 +83,10 @@ def convert_files(pt_files: List[Path], st_files: List[Path]): ] # We do this instead of using tqdm because we want to parse the logs with the launcher - logger.info("Converting weights...") start_time = time.time() for i, future in enumerate(concurrent.futures.as_completed(futures)): elapsed = timedelta(seconds=int(time.time() - start_time)) remaining = len(futures) - (i + 1) - if remaining != 0: - eta = (elapsed / (i + 1)) * remaining - else: - eta = 0 + eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0 logger.info(f"Convert: [{i + 1}/{len(futures)}] -- ETA: {eta}") diff --git a/server/text_generation/utils/hub.py b/server/text_generation/utils/hub.py index 60072a20..372df306 100644 --- a/server/text_generation/utils/hub.py +++ b/server/text_generation/utils/hub.py @@ -134,6 +134,7 @@ def download_weights( logger.info(f"File {filename} already present in cache.") return local_file + logger.info(f"Starting {filename} download.") start_time = time.time() local_file = hf_hub_download( filename=filename, @@ -144,7 +145,7 @@ def download_weights( logger.info( f"Downloaded {filename} at {local_file} in {timedelta(seconds=int(time.time() - start_time))}." ) - return local_file + return Path(local_file) executor = ThreadPoolExecutor(max_workers=5) futures = [ @@ -152,18 +153,14 @@ def download_weights( ] # We do this instead of using tqdm because we want to parse the logs with the launcher - logger.info("Downloading weights...") start_time = time.time() files = [] for i, future in enumerate(concurrent.futures.as_completed(futures)): elapsed = timedelta(seconds=int(time.time() - start_time)) remaining = len(futures) - (i + 1) - if remaining != 0: - eta = (elapsed / (i + 1)) * remaining - else: - eta = 0 + eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0 logger.info(f"Download: [{i + 1}/{len(futures)}] -- ETA: {eta}") - files.append(Path(future.result())) + files.append(future.result()) - return [Path(p) for p in files] + return files