fix

2025-09-11 20:34:54 +00:00 · 2024-05-14 19:58:19 +00:00 · 2024-05-14 19:58:19 +00:00 · 61b49859da
commit 61b49859da
parent f2fecdceca
3 changed files with 2 additions and 3 deletions
--- a/server/Makefile-vllm
+++ b/server/Makefile-vllm
@ -18,7 +18,6 @@ vllm-rocm:

 build-vllm-rocm: vllm-rocm
 	cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479
-	cd vllm && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch
 	cd vllm && PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install

 install-vllm-rocm: build-vllm-rocm
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -244,7 +244,7 @@ class LlamaMLP(nn.Module):
        )

    def forward(self, hidden_states):
-        if False and IS_ROCM_SYSTEM and self.hidden_act == "silu" and hidden_states.shape[0] == 1:
+        if IS_ROCM_SYSTEM and self.hidden_act == "silu" and hidden_states.shape[0] == 1:
            out = torch.empty(
                hidden_states.shape[0],
                self.intermediate_size,
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -366,7 +366,7 @@ class FastLinearROCm(nn.Module):
        weight = self.weight
        bias = self.bias

-        if False and IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
+        if IS_ROCM_SYSTEM and inp.numel() // inp.size(-1) == 1:
            batched = False

            if inp.dim() == 3: