ARG RELEASE |
ARG LAUNCHPAD_BUILD_ARCH |
LABEL org.opencontainers.image.ref.name=ubuntu |
LABEL org.opencontainers.image.version=22.04 |
ADD file:63d5ab3ef0aab308c0e71cb67292c5467f60deafa9b0418cbb220affcd078444 in / |
CMD ["/bin/bash"] |
ENV NVARCH=x86_64 |
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 |
ENV NV_CUDA_CUDART_VERSION=12.1.55-1 |
ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1 |
ARG TARGETARCH |
LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com> |
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.0-1_all.deb && dpkg -i cuda-keyring_1.0-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit |
ENV CUDA_VERSION=12.1.0 |
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-12-1=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} && rm -rf /var/lib/apt/lists/* # buildkit |
RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit |
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin |
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 |
COPY NGC-DL-CONTAINER-LICENSE / # buildkit |
ENV NVIDIA_VISIBLE_DEVICES=all |
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility |
ENV PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin CONDA_PREFIX=/opt/conda |
ENV HF_HOME=/data HF_HUB_ENABLE_HF_TRANSFER=1 PORT=80 |
WORKDIR /usr/src |
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libssl-dev ca-certificates make curl git && rm -rf /var/lib/apt/lists/* # buildkit |
COPY /opt/conda /opt/conda # buildkit |
COPY /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /opt/conda/lib/python3.11/site-packages/flash_attn_2_cuda.cpython-311-x86_64-linux-gnu.so /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/exllamav2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/eetq/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/lorax-punica/server/punica_kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/mamba/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages # buildkit |
COPY /opt/conda/lib/python3.11/site-packages/flashinfer/ /opt/conda/lib/python3.11/site-packages/flashinfer/ # buildkit |
RUN /bin/sh -c pip install einops --no-cache-dir # buildkit |
COPY proto proto # buildkit |
COPY server server # buildkit |
COPY server/Makefile server/Makefile # buildkit |
RUN /bin/sh -c cd server && make gen-server && pip install -r requirements_cuda.txt && pip install ".[bnb, accelerate, marlin, moe, quantize, peft, outlines]" --no-cache-dir && pip install nvidia-nccl-cu12==2.22.3 # buildkit |
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2 |
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/opt/conda/lib/ |
ENV EXLLAMA_NO_FLASH_ATTN=1 |
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential g++ && rm -rf /var/lib/apt/lists/* # buildkit |
COPY /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # buildkit |
COPY /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # buildkit |
COPY /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher # buildkit |
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh # buildkit |
RUN /bin/sh -c chmod +x /tgi-entrypoint.sh # buildkit |
ENTRYPOINT ["/tgi-entrypoint.sh"] |