fix: avoid setting use_sgmv if no kernels present (#2796)

This commit is contained in:
drbh 2024-12-04 15:26:09 -05:00 committed by GitHub
parent b57f370386
commit e0db633396
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -24,6 +24,7 @@ from text_generation_server.utils.sgmv import (
orient_for_rank,
pad_rank,
use_cutlass_shrink,
has_sgmv,
)
@ -325,7 +326,9 @@ class BatchLoraWeights(BatchAdapterWeights):
default=0,
)
use_sgmv = False
if prefill or max_rank > BGMV_MAX_RANK:
if has_sgmv():
use_sgmv = True
lora_a_ptr = torch.tensor(
[
@ -352,7 +355,6 @@ class BatchLoraWeights(BatchAdapterWeights):
device=device,
)
else:
use_sgmv = False
lora_a_ptr = torch.tensor(
[
(