text-generation-inference (2.4.0)

Published 2024-11-04 09:12:22 +00:00 by ivampiresp in huggingface/text-generation-inference

Installation

docker pull leafdev.top/huggingface/text-generation-inference:2.4.0
sha256:9948b6bd60459171e64badd1c2c79df370057eb79c6eb7e9bec6ebd84eabd29b

About this package

Large Language Model Text Generation Inference

Image Layers

ARG RELEASE
ARG LAUNCHPAD_BUILD_ARCH
LABEL org.opencontainers.image.ref.name=ubuntu
LABEL org.opencontainers.image.version=22.04
ADD file:63d5ab3ef0aab308c0e71cb67292c5467f60deafa9b0418cbb220affcd078444 in /
CMD ["/bin/bash"]
ENV NVARCH=x86_64
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526
ENV NV_CUDA_CUDART_VERSION=12.1.55-1
ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1
ARG TARGETARCH
LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com>
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.0-1_all.deb && dpkg -i cuda-keyring_1.0-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit
ENV CUDA_VERSION=12.1.0
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-12-1=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} && rm -rf /var/lib/apt/lists/* # buildkit
RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
COPY NGC-DL-CONTAINER-LICENSE / # buildkit
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin CONDA_PREFIX=/opt/conda
ENV HF_HOME=/data HF_HUB_ENABLE_HF_TRANSFER=1 PORT=80
WORKDIR /usr/src
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libssl-dev ca-certificates make curl git && rm -rf /var/lib/apt/lists/* # buildkit
COPY /opt/conda /opt/conda # buildkit
COPY /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /opt/conda/lib/python3.11/site-packages/flash_attn_2_cuda.cpython-311-x86_64-linux-gnu.so /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/exllamav2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/eetq/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/lorax-punica/server/punica_kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/mamba/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages # buildkit
COPY /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages # buildkit
COPY /opt/conda/lib/python3.11/site-packages/flashinfer/ /opt/conda/lib/python3.11/site-packages/flashinfer/ # buildkit
RUN /bin/sh -c pip install einops --no-cache-dir # buildkit
COPY proto proto # buildkit
COPY server server # buildkit
COPY server/Makefile server/Makefile # buildkit
RUN /bin/sh -c cd server && make gen-server && pip install -r requirements_cuda.txt && pip install ".[bnb, accelerate, marlin, moe, quantize, peft, outlines]" --no-cache-dir && pip install nvidia-nccl-cu12==2.22.3 # buildkit
ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/opt/conda/lib/
ENV EXLLAMA_NO_FLASH_ATTN=1
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential g++ && rm -rf /var/lib/apt/lists/* # buildkit
COPY /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # buildkit
COPY /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # buildkit
COPY /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher # buildkit
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh # buildkit
RUN /bin/sh -c chmod +x /tgi-entrypoint.sh # buildkit
ENTRYPOINT ["/tgi-entrypoint.sh"]

Labels

Key Value
maintainer NVIDIA CORPORATION <cudatools@nvidia.com>
org.opencontainers.image.created 2024-10-25T21:16:32.523Z
org.opencontainers.image.description Large Language Model Text Generation Inference
org.opencontainers.image.licenses Apache-2.0
org.opencontainers.image.ref.name ubuntu
org.opencontainers.image.revision 0a655a0ab5db15f08e45d8c535e263044b944190
org.opencontainers.image.source https://github.com/huggingface/text-generation-inference
org.opencontainers.image.title text-generation-inference
org.opencontainers.image.url https://github.com/huggingface/text-generation-inference
org.opencontainers.image.version 2.4.0
Details
Container
2024-11-04 09:12:22 +00:00
2
OCI / Docker
linux/amd64
Apache-2.0
6.5 GiB
Versions (1) View all
2.4.0 2024-11-04