mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 03:52:08 +00:00
reenable _custom_C.LLMM1 as the culprit was FA2 triton
This commit is contained in:
parent
81c27ba9c2
commit
325f9774fe
@ -355,8 +355,7 @@ class FastLinearROCm(nn.Module):
|
||||
weight = self.weight
|
||||
bias = self.bias
|
||||
|
||||
# TODO: fix for TP>=2, this only works for TP=1
|
||||
if False and IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
|
||||
if IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
|
||||
batched = False
|
||||
|
||||
if inp.dim() == 3:
|
||||
|
Loading…
Reference in New Issue
Block a user