diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 index 03f30f8a..b67803fe 100644 --- a/server/Makefile-flash-att-v2 +++ b/server/Makefile-flash-att-v2 @@ -9,10 +9,16 @@ install-flash-attention-v2-cuda: pip install -U packaging wheel pip install flash-attn==$(flash_att_v2_commit_cuda) -install-flash-attention-v2-rocm: build-flash-attention-v2-rocm +build-flash-attention-v2-rocm: if [ ! -d 'flash-attention-v2' ]; then \ pip install -U packaging ninja --no-cache-dir && \ git clone https://github.com/ROCm/flash-attention.git flash-attention-v2 && \ cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) && \ - git submodule update --init --recursive && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install; \ + git submodule update --init --recursive && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build; \ + fi + +install-flash-attention-v2-rocm: build-flash-attention-v2-rocm + if [ ! -d 'flash-attention-v2' ]; then \ + cd flash-attention-v2 && \ + GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install; \ fi diff --git a/server/Makefile-vllm b/server/Makefile-vllm index 9bb0bd50..de3b4611 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -11,10 +11,16 @@ install-vllm-cuda: build-vllm-cuda cd vllm && pip install -e .; \ fi -install-vllm-rocm: +build-vllm-rocm: if [ ! -d 'vllm' ]; then \ pip install -U ninja packaging --no-cache-dir && \ git clone https://github.com/fxmarty/rocm-vllm.git vllm && \ cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 && \ + PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build; \ + fi + +install-vllm-rocm: build-vllm-rocm + if [ ! -d 'vllm' ]; then \ + cd vllm && \ PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .; \ fi