fix bug where tunableop is bound to cuda graph even when cuda graph are disabled

This commit is contained in:
fxmarty 2024-06-06 13:53:43 +00:00
parent 35d1946e67
commit c36c7ec83b

View File

@ -907,8 +907,11 @@ class FlashCausalLM(Model):
int(val) int(val)
for val in os.environ["PYTORCH_TUNABLEOP_SEQLENS"].split(",") for val in os.environ["PYTORCH_TUNABLEOP_SEQLENS"].split(",")
] ]
else: elif CUDA_GRAPHS is not None:
tuning_sequences = CUDA_GRAPHS tuning_sequences = CUDA_GRAPHS
else:
# For seqlen = 1, we dispatch to LLMM1 kernel.
tuning_sequences = [2, 3, 4, 5, 6, 7]
tunableop_filepath = os.path.join( tunableop_filepath = os.path.join(
HUGGINGFACE_HUB_CACHE, HUGGINGFACE_HUB_CACHE,