diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 index 8b9f289df..71c6cabe0 100644 --- a/server/Makefile-flash-att-v2 +++ b/server/Makefile-flash-att-v2 @@ -1,26 +1,29 @@ -flash_att_v2_commit := 02ac572f3ffc4f402e4183aaa6824b45859d3ed3 +flash_att_v2_commit_cuda := 02ac572f3ffc4f402e4183aaa6824b45859d3ed3 +flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69 -build-flash-attention-v2-cuda: FLASH_ATTN_V2_COMMIT=02ac572f3ffc4f402e4183aaa6824b45859d3ed3 -build-flash-attention-v2-cuda: FLASH_REPOSITORY=https://github.com/HazyResearch/flash-attention.git -build-flash-attention-v2-cuda: BRANCH=main -build-flash-attention-v2-cuda: PYTORCH_ROCM_ARCH="" -build-flash-attention-v2-cuda: build-flash-attention-v2 -build-flash-attention-v2-rocm: FLASH_ATTN_V2_COMMIT=8736558c287ff2ef28b24878e42828c595ac3e69 -build-flash-attention-v2-rocm: FLASH_REPOSITORY=https://github.com/fxmarty/flash-attention-rocm -build-flash-attention-v2-rocm: BRANCH=remove-offload-arch-native -build-flash-attention-v2-rocm: PYTORCH_ROCM_ARCH=gfx90a -build-flash-attention-v2-rocm: build-flash-attention-v2 - -flash-attention-v2: +flash-attention-v2-cuda: # Clone flash attention pip install -U packaging ninja --no-cache-dir - git clone --single-branch --branch $(BRANCH) $(FLASH_REPOSITORY) flash-attention-v2 + git clone https://github.com/HazyResearch/flash-attention.git flash-attention-v2 -build-flash-attention-v2: flash-attention-v2 - cd flash-attention-v2 && git fetch && git checkout $(FLASH_ATTN_V2_COMMIT) +build-flash-attention-v2-cuda: flash-attention-v2-cuda + cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_cuda) cd flash-attention-v2 && git submodule update --init --recursive - cd flash-attention-v2 && PYTORCH_ROCM_ARCH=$(PYTORCH_ROCM_ARCH) python setup.py build + cd flash-attention-v2 && python setup.py build -install-flash-attention-v2: build-flash-attention-v2 - cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install \ No newline at end of file +install-flash-attention-v2-cuda: build-flash-attention-v2-cuda + cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install + +flash-attention-v2-rocm: + # Clone flash attention + pip install -U packaging ninja --no-cache-dir + git clone https://github.com/fxmarty/flash-attention-rocm flash-attention-v2 + +build-flash-attention-v2-rocm: flash-attention-v2-rocm + cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) + cd flash-attention-v2 && git submodule update --init --recursive + cd flash-attention-v2 && PYTORCH_ROCM_ARCH=gfx90a python setup.py build + +install-flash-attention-v2-rocm: build-flash-attention-v2-rocm + cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install