From 9904f66966f51cbd1f6d0a407179d433b09fec6d Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Wed, 10 Jan 2024 16:49:00 +0100 Subject: [PATCH] fix value --- integration-tests/conftest.py | 2 +- server/text_generation_server/models/flash_causal_lm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 69b3fe67..efeda08d 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -319,7 +319,7 @@ def launcher(event_loop): env = { "LOG_LEVEL": "info,text_generation_router=debug", - "ENABLE_CUDA_GRAPHS": "True", + "ENABLE_CUDA_GRAPHS": "true", } if not use_flash_attention: env["USE_FLASH_ATTENTION"] = "false" diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 21ed4f6c..e68a2100 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -777,7 +777,7 @@ class FlashCausalLM(Model): self.device, ) - if os.getenv("ENABLE_CUDA_GRAPHS", "False") == "True": + if os.getenv("ENABLE_CUDA_GRAPHS", "false") == "true": try: # Warmup cuda graphs for all power of twos until 64 for i in range(6):