From 81c27ba9c2e46196bb6fdca554b6e6ee6caff28b Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:59:31 +0000 Subject: [PATCH] disable _custom_C.LLMM1 as it is broken for TP>=2 --- server/text_generation_server/utils/layers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 8e36f6542..27adc775f 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -355,7 +355,8 @@ class FastLinearROCm(nn.Module): weight = self.weight bias = self.bias - if IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1: + # TODO: fix for TP>=2, this only works for TP=1 + if False and IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1: batched = False if inp.dim() == 3: