reenable _custom_C.LLMM1 as the culprit was FA2 triton

2025-09-19 00:04:51 +00:00 · 2024-04-19 16:19:47 +00:00 · 2024-04-19 16:19:47 +00:00 · 325f9774fe
commit 325f9774fe
parent 81c27ba9c2
1 changed files with 1 additions and 2 deletions
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -355,8 +355,7 @@ class FastLinearROCm(nn.Module):
        weight = self.weight
        bias = self.bias
-        # TODO: fix for TP>=2, this only works for TP=1
+        if IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
        if False and IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
            batched = False
            if inp.dim() == 3: