From 0d9b2f2541ef366090f1a18f1d817e9210a2560f Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:02:50 +0000 Subject: [PATCH] enable tunableop by default --- server/text_generation_server/models/flash_causal_lm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index b46707b3..8b9d4a2e 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -899,6 +899,8 @@ class FlashCausalLM(Model): os.environ.get("PYTORCH_TUNABLEOP_ENABLED") is None or os.environ.get("PYTORCH_TUNABLEOP_ENABLED") == "1" ): + torch.cuda.tunable.enable() + if os.environ.get("PYTORCH_TUNABLEOP_TUNING") != "0": torch.cuda.tunable.tuning_enable(True)