mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
fix: avoid setting use_sgmv if no kernels present (#2796)
This commit is contained in:
parent
b57f370386
commit
e0db633396
@ -24,6 +24,7 @@ from text_generation_server.utils.sgmv import (
|
||||
orient_for_rank,
|
||||
pad_rank,
|
||||
use_cutlass_shrink,
|
||||
has_sgmv,
|
||||
)
|
||||
|
||||
|
||||
@ -325,7 +326,9 @@ class BatchLoraWeights(BatchAdapterWeights):
|
||||
default=0,
|
||||
)
|
||||
|
||||
use_sgmv = False
|
||||
if prefill or max_rank > BGMV_MAX_RANK:
|
||||
if has_sgmv():
|
||||
use_sgmv = True
|
||||
lora_a_ptr = torch.tensor(
|
||||
[
|
||||
@ -352,7 +355,6 @@ class BatchLoraWeights(BatchAdapterWeights):
|
||||
device=device,
|
||||
)
|
||||
else:
|
||||
use_sgmv = False
|
||||
lora_a_ptr = torch.tensor(
|
||||
[
|
||||
(
|
||||
|
Loading…
Reference in New Issue
Block a user