fix: also show total memory after full warmup

2025-08-01 21:00:15 +00:00 · 2024-08-22 17:57:51 +00:00 · 2024-08-22 17:57:51 +00:00 · e152cb022b
commit e152cb022b
parent 8b4cd2a9fc
1 changed files with 2 additions and 1 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -1386,7 +1386,8 @@ class FlashCausalLM(Model):
                total_cuda_graph_memory = free_memory_post_alloc - last_available_memory
                log_master(
                    logger.info,
-                    f"Total memory used for CUDA graphs: {total_cuda_graph_memory/1024/1024:.2f} MB",
+                    f"Total memory used for CUDA graphs: {total_cuda_graph_memory/1024/1024:.2f} MB"
                    f"\nTotal memory available: {last_available_memory/1024/1024:.2f} MB",
                )
            except torch.cuda.OutOfMemoryError:
                logger.exception("Decode cuda graph warmup failed")