mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
* update vllm commit & fix models using sliding window * update * update commit * fix bug where tunableop is bound to cuda graph even when cuda graph are disabled * enable tunableop by default * fix sliding window * address review * dead code * precise comment * is it flaky?
24 lines
884 B
Plaintext
24 lines
884 B
Plaintext
commit_cuda := b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
|
|
commit_rocm := 559200c1a028de990c1ddea761b0ccd62109e3a0
|
|
build-vllm-cuda:
|
|
if [ ! -d 'vllm' ]; then \
|
|
pip install -U ninja packaging --no-cache-dir && \
|
|
git clone https://github.com/Narsil/vllm.git vllm; \
|
|
fi
|
|
cd vllm && git fetch && git checkout $(commit_cuda) && python setup.py build
|
|
|
|
install-vllm-cuda: build-vllm-cuda
|
|
cd vllm && git fetch && git checkout $(commit_cuda) && pip install -e .
|
|
|
|
build-vllm-rocm:
|
|
if [ ! -d 'vllm' ]; then \
|
|
pip install -U ninja packaging --no-cache-dir && \
|
|
git clone https://github.com/fxmarty/rocm-vllm.git vllm; \
|
|
fi
|
|
cd vllm && git fetch && git checkout $(commit_rocm) && \
|
|
PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
|
|
|
|
install-vllm-rocm: build-vllm-rocm
|
|
cd vllm && git fetch && git checkout $(commit_rocm) && \
|
|
PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .
|