diff --git a/server/Makefile b/server/Makefile index 6b6da230..089917ce 100644 --- a/server/Makefile +++ b/server/Makefile @@ -10,7 +10,7 @@ unit-tests: gen-server: # Compile protos - pip install grpcio-tools==1.64.0 mypy-protobuf==3.6.0 'types-protobuf' --no-cache-dir + pip install grpcio-tools==1.62.2 mypy-protobuf==3.6.0 'types-protobuf' --no-cache-dir mkdir text_generation_server/pb || true python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \ --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index 9982421a..79928606 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -1,16 +1,9 @@ flash_att_commit := 3a9bfd076f98746c73362328958dbc68d145fbec -flash-attention: - # Clone flash attention - pip install -U packaging ninja --no-cache-dir - git clone https://github.com/HazyResearch/flash-attention.git - -build-flash-attention: flash-attention - cd flash-attention && git fetch && git checkout $(flash_att_commit) - cd flash-attention && MAX_JOBS=8 python setup.py build - cd flash-attention/csrc/rotary && python setup.py build - cd flash-attention/csrc/layer_norm && python setup.py build - -install-flash-attention: build-flash-attention - pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true - cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install +install-flash-attention: + if [ ! -d 'flash-attention' ]; then \ + pip install -U packaging ninja --no-cache-dir && \ + git clone https://github.com/HazyResearch/flash-attention.git && \ + cd flash-attention && git fetch && git checkout $(flash_att_commit) && \ + MAX_JOBS=8 python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install; \ + fi diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 index 01586269..eba98888 100644 --- a/server/Makefile-flash-att-v2 +++ b/server/Makefile-flash-att-v2 @@ -4,15 +4,10 @@ flash_att_v2_commit_rocm := 2554f490101742ccdc56620a938f847f61754be6 install-flash-attention-v2-cuda: pip install flash-attn==$(flash_att_v2_commit_cuda) -flash-attention-v2-rocm: - # Clone flash attention - pip install -U packaging ninja --no-cache-dir - git clone https://github.com/ROCm/flash-attention.git flash-attention-v2 - -build-flash-attention-v2-rocm: flash-attention-v2-rocm - cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) - cd flash-attention-v2 && git submodule update --init --recursive - cd flash-attention-v2 && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build - install-flash-attention-v2-rocm: build-flash-attention-v2-rocm - cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install + if [ ! -d 'flash-attention-v2' ]; then \ + pip install -U packaging ninja --no-cache-dir && \ + git clone https://github.com/ROCm/flash-attention.git flash-attention-v2 && \ + cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) && \ + git submodule update --init --recursive && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install; \ + fi diff --git a/server/Makefile-vllm b/server/Makefile-vllm index 62fa413f..30528cb3 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -1,25 +1,16 @@ -vllm-cuda: - # Clone vllm - pip install -U ninja packaging --no-cache-dir - git clone https://github.com/Narsil/vllm.git vllm +install-vllm-cuda: + if [ ! -d 'vllm' ]; then \ + pip install -U ninja packaging --no-cache-dir && \ + git clone https://github.com/Narsil/vllm.git vllm &&\ + cd vllm && \ + git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa &&\ + pip install -e .; \ + fi -build-vllm-cuda: vllm-cuda - cd vllm && git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa - cd vllm && python setup.py build - -install-vllm-cuda: build-vllm-cuda - pip uninstall vllm -y || true - cd vllm && python setup.py install - -vllm-rocm: - # Clone vllm - pip install -U ninja packaging --no-cache-dir - git clone https://github.com/fxmarty/rocm-vllm.git vllm - -build-vllm-rocm: vllm-rocm - cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 - cd vllm && PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install - -install-vllm-rocm: build-vllm-rocm - pip uninstall vllm -y || true - cd vllm && python setup.py install +install-vllm-rocm: + if [ ! -d 'vllm' ]; then \ + pip install -U ninja packaging --no-cache-dir && \ + git clone https://github.com/fxmarty/rocm-vllm.git vllm && \ + cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 && \ + PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .; \ + fi