From ad94f299f48de3b38438f5b66fddc20ac3b672f5 Mon Sep 17 00:00:00 2001 From: drbh Date: Sun, 26 May 2024 23:21:07 -0400 Subject: [PATCH] feat: compile vllm for cuda after flash_attn --- .github/workflows/python-packaging.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/python-packaging.yaml b/.github/workflows/python-packaging.yaml index c0a42201d..0f37ef966 100644 --- a/.github/workflows/python-packaging.yaml +++ b/.github/workflows/python-packaging.yaml @@ -159,6 +159,12 @@ jobs: MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist cd ../layer_norm MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist + # build the kernels for vllm as well + cd ../.. + git clone https://github.com/Narsil/vllm.git + cd vllm + git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa + python setup.py bdist_wheel --dist-dir=../dist # Generate a custom name for the wheel to include CUDA and Torch versions tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }} wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")