From 43f30553318eeefe5bb0af766a5c7f5166d887a5 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 4 May 2023 10:58:20 +0200 Subject: [PATCH] Removes the parallelism in file convertion (during download) - Parallelism increases memory usage (each process has to convert a 10Go file on average, meaning 20Go per process, so could be 100Go for 5 workers) - Parallelism might not speed the process up (Looking into it). Since everything should be limited by disk speed, making concurrent shouldn't really help. Updated the timings from ETA to elapsed (since ETA was imprecise anyway). Also using `datetime` directly to get simpler code IMO. --- .../text_generation_server/utils/convert.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/server/text_generation_server/utils/convert.py b/server/text_generation_server/utils/convert.py index 437e2308..8eddd76f 100644 --- a/server/text_generation_server/utils/convert.py +++ b/server/text_generation_server/utils/convert.py @@ -1,5 +1,6 @@ import concurrent import time +import datetime import torch from concurrent.futures import ThreadPoolExecutor @@ -78,17 +79,9 @@ def convert_file(pt_file: Path, st_file: Path): def convert_files(pt_files: List[Path], st_files: List[Path]): assert len(pt_files) == len(st_files) - executor = ThreadPoolExecutor(max_workers=5) - futures = [ - executor.submit(convert_file, pt_file=pt_file, st_file=st_file) - for pt_file, st_file in zip(pt_files, st_files) - ] - + N = len(pt_files) # We do this instead of using tqdm because we want to parse the logs with the launcher - start_time = time.time() - for i, future in enumerate(concurrent.futures.as_completed(futures)): - elapsed = timedelta(seconds=int(time.time() - start_time)) - remaining = len(futures) - (i + 1) - eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0 - - logger.info(f"Convert: [{i + 1}/{len(futures)}] -- ETA: {eta}") + start = datetime.datetime.now() + for i, (pt_file, sf_file) in enumerate(zip(pt_files, st_files)): + elapsed = datetime.datetime.now() - start + logger.info(f"Convert: [{i + 1}/{N}] -- Took: {elapsed}")