Use ROCM 6.3.1 (#3141)

* update dockerfile

* add updated makefile

* fix docker

* Lint.

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Mohit Sharma 2025-04-07 16:25:11 +05:30 committed by GitHub
parent d23b385eee
commit 9c26b52940
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 163 additions and 221 deletions

View File

@ -41,303 +41,244 @@ COPY backends backends
COPY launcher launcher COPY launcher launcher
RUN cargo build --profile release-opt --frozen RUN cargo build --profile release-opt --frozen
# Text Generation Inference base image for RoCm FROM rocm/dev-ubuntu-22.04:6.3.1-complete AS base
FROM rocm/dev-ubuntu-22.04:6.2 AS base
ARG HIPBLASLT_BRANCH="4d40e36"
ARG HIPBLAS_COMMON_BRANCH="7c1566b"
ARG LEGACY_HIPBLASLT_OPTION=
ARG RCCL_BRANCH="648a58d"
ARG RCCL_REPO="https://github.com/ROCm/rccl"
ARG TRITON_BRANCH="e5be006"
ARG TRITON_REPO="https://github.com/triton-lang/triton.git"
ARG PYTORCH_BRANCH="3a585126"
ARG PYTORCH_VISION_BRANCH="v0.19.1"
ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
ARG FA_BRANCH="b7d29fb"
ARG FA_REPO="https://github.com/ROCm/flash-attention.git"
ARG AITER_BRANCH="21d47a9"
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
ENV PATH=/opt/rocm/llvm/bin:$PATH
ENV ROCM_PATH=/opt/rocm
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
ARG PYTHON_VERSION=3.11
RUN mkdir -p /app
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive
# Install Python and other dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \ build-essential \
ca-certificates \ ca-certificates \
ccache \ ccache \
curl \ curl \
git \ git \
make \ ninja-build \
libmsgpack-dev \
libssl-dev \
llvm-dev \
g++ \
# Needed to build VLLM & flash.
rocthrust-dev \
hipsparse-dev \
hipblas-dev \
hipcub-dev \
rocblas-dev \
hiprand-dev \
hipfft-dev \
rocrand-dev \
miopen-hip-dev \
hipsolver-dev \
rccl-dev \
cmake \ cmake \
software-properties-common \
python3.11-dev \
python3.11-venv && \ python3.11-venv && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Keep in sync with `server/pyproject.toml COPY --from=ghcr.io/astral-sh/uv:0.5.31 /uv /uvx /bin/
ARG MAMBA_VERSION=23.1.0-1 ENV PATH="$PATH:/root/.local/bin"
ARG PYTHON_VERSION='3.11.10' RUN uv python install ${PYTHON_VERSION}
# Automatically set by buildx RUN uv venv --python ${PYTHON_VERSION} && uv pip install pip setuptools packaging
ARG TARGETPLATFORM ENV VIRTUAL_ENV=/usr/src/.venv/
ENV PATH=/opt/conda/bin:$PATH ENV PATH="$PATH:/usr/src/.venv/bin/"
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942" RUN . .venv/bin/activate && pip install -U packaging cmake ninja wheel setuptools pybind11 Cython
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
bash ~/mambaforge.sh -b -p /opt/conda && \
mamba init && \
rm ~/mambaforge.sh
# RUN conda install intel::mkl-static intel::mkl-include
# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
esac && \
/opt/conda/bin/conda clean -ya
# Install flash-attention, torch dependencies
RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
RUN conda install mkl=2021
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/
ARG COMMON_WORKDIR=/
WORKDIR ${COMMON_WORKDIR}
# Install HIPBLASLt
FROM base AS build_hipblaslt FROM base AS build_hipblaslt
ARG HIPBLASLT_BRANCH="e6da924" ARG HIPBLASLT_BRANCH
RUN git clone https://github.com/ROCm/hipBLASLt.git \ ARG HIPBLAS_COMMON_BRANCH
&& cd hipBLASLt \ # Set to "--legacy_hipblas_direct" for ROCm<=6.2
ARG LEGACY_HIPBLASLT_OPTION
RUN git clone https://github.com/ROCm/hipBLAS-common.git
RUN . .venv/bin/activate && cd hipBLAS-common \
&& git checkout ${HIPBLAS_COMMON_BRANCH} \
&& mkdir build \
&& cd build \
&& cmake .. \
&& make package \
&& dpkg -i ./*.deb
RUN git clone https://github.com/ROCm/hipBLASLt
RUN . .venv/bin/activate && cd hipBLASLt \
&& git checkout ${HIPBLASLT_BRANCH} \ && git checkout ${HIPBLASLT_BRANCH} \
&& SCCACHE_IDLE_TIMEOUT=1800 ./install.sh --architecture ${PYTORCH_ROCM_ARCH} --legacy_hipblas_direct \ && ./install.sh -d --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \
&& cd build/release \ && cd build/release \
&& make package && make package
RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install
FROM scratch AS export_hipblaslt
ARG COMMON_WORKDIR
COPY --from=build_hipblaslt ${COMMON_WORKDIR}/hipBLASLt/build/release/*.deb /
# RCCL build stages
FROM base AS build_rccl FROM base AS build_rccl
ARG RCCL_BRANCH="rocm-6.2.0" ARG RCCL_BRANCH
RUN git clone https://github.com/ROCm/rccl \ ARG RCCL_REPO
&& cd rccl \ RUN git clone ${RCCL_REPO}
RUN . .venv/bin/activate && cd rccl \
&& git checkout ${RCCL_BRANCH} \ && git checkout ${RCCL_BRANCH} \
&& ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH} && ./install.sh -p --amdgpu_targets ${PYTORCH_ROCM_ARCH}
FROM scratch AS export_rccl RUN mkdir -p /app/install && cp /app/rccl/build/release/*.deb /app/install
ARG COMMON_WORKDIR
COPY --from=build_rccl ${COMMON_WORKDIR}/rccl/build/release/*.deb /
# Triton build stages
FROM base AS build_triton FROM base AS build_triton
ARG TRITON_BRANCH="e192dba" ARG TRITON_BRANCH
ARG TRITON_REPO="https://github.com/triton-lang/triton.git" ARG TRITON_REPO
RUN python3 -m pip install ninja cmake wheel pybind11 && git clone ${TRITON_REPO} \ RUN git clone ${TRITON_REPO}
&& cd triton \ RUN . .venv/bin/activate && cd triton \
&& git checkout ${TRITON_BRANCH} \ && git checkout ${TRITON_BRANCH} \
&& cd python \ && cd python \
&& python3 setup.py bdist_wheel --dist-dir=dist && python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_triton RUN mkdir -p /app/install && cp /app/triton/python/dist/*.whl /app/install
ARG COMMON_WORKDIR
COPY --from=build_triton ${COMMON_WORKDIR}/triton/python/dist/*.whl /
# # AMD-SMI build stages
FROM base AS build_amdsmi FROM base AS build_amdsmi
RUN cd /opt/rocm/share/amd_smi \ RUN . .venv/bin/activate && cd /opt/rocm/share/amd_smi \
&& pip wheel . --wheel-dir=dist && pip wheel . --wheel-dir=dist
FROM scratch AS export_amdsmi RUN mkdir -p /app/install && cp /opt/rocm/share/amd_smi/dist/*.whl /app/install
COPY --from=build_amdsmi /opt/rocm/share/amd_smi/dist/*.whl /
FROM base AS build_pytorch
ARG PYTORCH_BRANCH
ARG PYTORCH_VISION_BRANCH
ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO
ARG FA_BRANCH
ARG FA_REPO
RUN git clone ${PYTORCH_REPO} pytorch
RUN . .venv/bin/activate && cd pytorch && git checkout ${PYTORCH_BRANCH} && \
pip install -r requirements.txt && git submodule update --init --recursive \
&& python3 tools/amd_build/build_amd.py \
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
&& pip install dist/*.whl
RUN git clone ${PYTORCH_VISION_REPO} vision
RUN . .venv/bin/activate && cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
&& python3 setup.py bdist_wheel --dist-dir=dist \
&& pip install dist/*.whl
RUN git clone ${FA_REPO}
RUN . .venv/bin/activate && cd flash-attention \
&& git checkout ${FA_BRANCH} \
&& git submodule update --init \
&& MAX_JOBS=64 GPU_ARCHS=${PYTORCH_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
&& cp /app/vision/dist/*.whl /app/install \
&& cp /app/flash-attention/dist/*.whl /app/install
FROM base as build_pytorch FROM base AS final
RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \
RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \ dpkg -i /install/*deb \
if ls /install/*.deb; then \
dpkg -i /install/*.deb \
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \ && sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \ && sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status
fi RUN --mount=type=bind,from=build_rccl,src=/app/install/,target=/install \
dpkg -i /install/*deb \
ARG BUILD_ENVIRONMENT=pytorch-linux-jammy-rocm6.2-py3.11
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942"
# A commit to fix the output scaling factor issue in _scaled_mm
# Not yet in 2.5.0-rc1
ARG PYTORCH_BRANCH="cedc116"
ARG PYTORCH_VISION_BRANCH="v0.19.1"
ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
RUN git clone ${PYTORCH_REPO} pytorch \
&& cd pytorch && git checkout ${PYTORCH_BRANCH} && git submodule update --init --recursive \
&& pip install -r requirements.txt --no-cache-dir \
&& python tools/amd_build/build_amd.py \
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch as export_pytorch
ARG COMMON_WORKDIR
COPY --from=build_pytorch ${COMMON_WORKDIR}/pytorch/dist/*.whl /
FROM base AS install_deps
ARG COMMON_WORKDIR
# Install hipblaslt
RUN --mount=type=bind,from=export_hipblaslt,src=/,target=/install \
if ls /install/*.deb; then \
dpkg -i /install/*.deb \
&& sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status \
&& sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status; \
fi
RUN --mount=type=bind,from=export_rccl,src=/,target=/install \
if ls /install/*.deb; then \
dpkg -i /install/*.deb \
# RCCL needs to be installed twice
&& dpkg -i /install/*.deb \
&& sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \ && sed -i 's/, rccl-dev \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status \
&& sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status; \ && sed -i 's/, rccl \(.*\), rocalution/, rocalution/g' /var/lib/dpkg/status
fi RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
. .venv/bin/activate && \
pip install /install/*.whl
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
. .venv/bin/activate && \
pip install /install/*.whl
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
. .venv/bin/activate && \
pip install /install/*.whl
RUN --mount=type=bind,from=export_triton,src=/,target=/install \ ARG AITER_REPO
if ls /install/*.whl; then \ ARG AITER_BRANCH
# Preemptively uninstall to prevent pip same-version no-installs RUN git clone --recursive ${AITER_REPO}
pip uninstall -y triton \ RUN . .venv/bin/activate && cd aiter \
&& pip install /install/*.whl; \ && git checkout ${AITER_BRANCH} \
fi && git submodule update --init --recursive \
&& pip install -r requirements.txt \
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop && pip show aiter
RUN --mount=type=bind,from=export_amdsmi,src=/,target=/install \ RUN rm -rf /var/lib/apt/lists/*
# Preemptively uninstall to prevent pip same-version no-installs
pip uninstall -y amdsmi \
&& pip install /install/*.whl;
RUN --mount=type=bind,from=export_pytorch,src=/,target=/install \
if ls /install/*.whl; then \
# Preemptively uninstall to prevent pip same-version no-installs
pip uninstall -y torch torchvision \
&& pip install /install/*.whl; \
fi
FROM install_deps AS kernel-builder
FROM final AS kernel-builder
# # Build vllm kernels # # Build vllm kernels
FROM kernel-builder AS vllm-builder FROM kernel-builder AS vllm-builder
WORKDIR /usr/src
COPY server/Makefile-vllm Makefile COPY server/Makefile-vllm Makefile
RUN pip install setuptools_scm RUN . .venv/bin/activate && pip install setuptools_scm
# Build specific version of vllm # Build specific version of vllm
RUN make build-vllm-rocm RUN . .venv/bin/activate && make build-vllm-rocm
# Build Flash Attention v2 kernels
FROM kernel-builder AS flash-att-v2-builder
WORKDIR /usr/src
COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2
RUN make build-flash-attention-v2-rocm
# Build Transformers CUDA kernels (gpt-neox and bloom) # Build Transformers CUDA kernels (gpt-neox and bloom)
FROM kernel-builder AS custom-kernels-builder FROM kernel-builder AS custom-kernels-builder
WORKDIR /usr/src
COPY server/custom_kernels/ . COPY server/custom_kernels/ .
RUN python setup.py build RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
# Build exllama kernels # Build exllama kernels
FROM kernel-builder AS exllama-kernels-builder FROM kernel-builder AS exllama-kernels-builder
WORKDIR /usr/src
COPY server/exllama_kernels/ . COPY server/exllama_kernels/ .
RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
RUN python setup.py build
# Build exllama v2 kernels # Build exllama v2 kernels
FROM kernel-builder AS exllamav2-kernels-builder FROM kernel-builder AS exllamav2-kernels-builder
WORKDIR /usr/src
COPY server/exllamav2_kernels/ . COPY server/exllamav2_kernels/ .
RUN . .venv/bin/activate && python3 setup.py bdist_wheel --dist-dir=dist
RUN python setup.py build
FROM kernel-builder AS marlin-kernels FROM kernel-builder AS marlin-kernels
WORKDIR /usr/src
ENV MARLIN_KERNELS_BRANCH=v0.3.6 ENV MARLIN_KERNELS_BRANCH=v0.3.6
ENV VLLM_TARGET_DEVICE=rocm ENV VLLM_TARGET_DEVICE=rocm
RUN git clone https://github.com/danieldk/marlin-kernels.git && \ RUN . .venv/bin/activate && git clone https://github.com/danieldk/marlin-kernels.git && \
cd marlin-kernels && \ cd marlin-kernels && \
git checkout ${MARLIN_KERNELS_BRANCH} && \ git checkout ${MARLIN_KERNELS_BRANCH} && \
python setup.py install python3 setup.py bdist_wheel --dist-dir=dist
FROM kernel-builder AS moe-kernels FROM kernel-builder AS moe-kernels
WORKDIR /usr/src
ENV MOE_KERNELS_BRANCH=v0.8.2 ENV MOE_KERNELS_BRANCH=v0.8.2
ENV VLLM_TARGET_DEVICE=rocm ENV VLLM_TARGET_DEVICE=rocm
RUN git clone https://github.com/danieldk/moe-kernels.git && \ RUN . .venv/bin/activate && git clone https://github.com/danieldk/moe-kernels.git && \
cd moe-kernels && \ cd moe-kernels && \
git checkout ${MOE_KERNELS_BRANCH} && \ git checkout ${MOE_KERNELS_BRANCH} && \
python setup.py install python3 setup.py bdist_wheel --dist-dir=dist
FROM install_deps AS base-copy FROM final AS base-copy
# Text Generation Inference base env # Text Generation Inference base env
ENV HF_HOME=/data \ ENV HF_HOME=/data \
HF_HUB_ENABLE_HF_TRANSFER=1 \ HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80 PORT=80
# Copy builds artifacts from vllm builder ENV VIRTUAL_ENV=/app/.venv/
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages ENV PATH="$PATH:/app/.venv/bin/"
# Copy build artifacts from flash attention v2 builder
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Copy build artifacts from custom kernels builder
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Copy build artifacts from exllama kernels builder
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Copy build artifacts from exllamav2 kernels builder
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Copy build artifacts from marlin kernels
COPY --from=marlin-kernels /usr/src/marlin-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Copy build artifacts from moe kernels
COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
# Install server # Install server
COPY proto proto COPY proto proto
COPY server server COPY server server
COPY server/Makefile server/Makefile COPY server/Makefile server/Makefile
ENV UV_SYSTEM_PYTHON=1
RUN cd server && \ RUN cd server && \
pip install -U pip uv && \ uv pip install grpcio-tools mypy-protobuf && \
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \ uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir && \
. ./.venv/bin/activate && \
make gen-server-raw make gen-server-raw
RUN cd server && \ RUN cd server && \
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
. ./.venv/bin/activate && \
pwd && \ pwd && \
text-generation-server --help text-generation-server --help
RUN --mount=type=bind,from=vllm-builder,src=/app/vllm/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=custom-kernels-builder,src=/app/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=custom-kernels-builder,src=/app/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=exllama-kernels-builder,src=/app/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=exllamav2-kernels-builder,src=/app/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=marlin-kernels,src=/app/marlin-kernels/dist,target=/install \
uv pip install /install/*.whl
RUN --mount=type=bind,from=moe-kernels,src=/app/moe-kernels/dist,target=/install \
uv pip install /install/*.whl
# Install benchmarker # Install benchmarker
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
# Install router # Install router
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
# Install launcher # Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
# AWS Sagemaker compatible image # AWS Sagemaker compatible image
FROM base AS sagemaker FROM base AS sagemaker
@ -368,4 +309,6 @@ COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
RUN chmod +x /tgi-entrypoint.sh RUN chmod +x /tgi-entrypoint.sh
ENTRYPOINT ["/tgi-entrypoint.sh"] ENTRYPOINT ["/tgi-entrypoint.sh"]
CMD ["--json-output"] ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.local/share/uv/python/cpython-3.11.11-linux-x86_64-gnu/lib"
ENV PYTHONPATH=/app/.venv/lib/python3.11/site-packages
# CMD ["--json-output"]

View File

@ -6,8 +6,7 @@ build-vllm-rocm:
git clone https://github.com/mht-sharma/vllm.git vllm; \ git clone https://github.com/mht-sharma/vllm.git vllm; \
fi fi
cd vllm && git fetch && git checkout $(commit_rocm) && \ cd vllm && git fetch && git checkout $(commit_rocm) && \
PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build PYTORCH_ROCM_ARCH="gfx90a;gfx942" python3 setup.py bdist_wheel --dist-dir=dist
install-vllm-rocm: build-vllm-rocm install-vllm-rocm: build-vllm-rocm
cd vllm && git fetch && git checkout $(commit_rocm) && \ cd vllm && git fetch && git checkout $(commit_rocm)
PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .