From e0db633396dbf9eaac26db982fc77a6e3b5b296b Mon Sep 17 00:00:00 2001 From: drbh Date: Wed, 4 Dec 2024 15:26:09 -0500 Subject: [PATCH] fix: avoid setting use_sgmv if no kernels present (#2796) --- server/text_generation_server/adapters/lora.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/adapters/lora.py b/server/text_generation_server/adapters/lora.py index a00338e7..f1edd9a0 100644 --- a/server/text_generation_server/adapters/lora.py +++ b/server/text_generation_server/adapters/lora.py @@ -24,6 +24,7 @@ from text_generation_server.utils.sgmv import ( orient_for_rank, pad_rank, use_cutlass_shrink, + has_sgmv, ) @@ -325,8 +326,10 @@ class BatchLoraWeights(BatchAdapterWeights): default=0, ) + use_sgmv = False if prefill or max_rank > BGMV_MAX_RANK: - use_sgmv = True + if has_sgmv(): + use_sgmv = True lora_a_ptr = torch.tensor( [ ( @@ -352,7 +355,6 @@ class BatchLoraWeights(BatchAdapterWeights): device=device, ) else: - use_sgmv = False lora_a_ptr = torch.tensor( [ (