mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
This PR adds support for AMD Instinct MI210 & MI250 GPUs, with paged attention and FAv2 support. Remaining items to discuss, on top of possible others: * Should we have a `ghcr.io/huggingface/text-generation-inference:1.1.0+rocm` hosted image, or is it too early? * Should we set up a CI on MI210/MI250? I don't have access to the runners of TGI though. * Are we comfortable with those changes being directly in TGI, or do we need a fork? --------- Co-authored-by: Felix Marty <felix@hf.co> Co-authored-by: OlivierDehaene <olivier@huggingface.co> Co-authored-by: Your Name <you@example.com>
23 lines
758 B
Plaintext
23 lines
758 B
Plaintext
build-vllm-cuda: REPOSITORY=https://github.com/vllm-project/vllm.git
|
|
build-vllm-cuda: VLLM_COMMIT=f8a1e39fae05ca610be8d5a78be9d40f5274e5fc
|
|
build-vllm-cuda: BRANCH=main
|
|
build-vllm-cuda: build-vllm
|
|
|
|
build-vllm-rocm: REPOSITORY=https://github.com/fxmarty/vllm-public.git
|
|
build-vllm-rocm: VLLM_COMMIT=ad9b7c4095ef54419a0533d254f2ad84bd2dfcae
|
|
build-vllm-rocm: BRANCH=rotary-no-positions-split-cos-sin
|
|
build-vllm-rocm: build-vllm
|
|
|
|
vllm:
|
|
# Clone vllm
|
|
pip install -U ninja packaging --no-cache-dir
|
|
git clone --single-branch --branch $(BRANCH) $(REPOSITORY) vllm
|
|
|
|
build-vllm: vllm
|
|
cd vllm && git fetch && git checkout $(VLLM_COMMIT)
|
|
cd vllm && python setup.py build
|
|
|
|
install-vllm: build-vllm
|
|
pip uninstall vllm -y || true
|
|
cd vllm && python setup.py install
|