mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
use mamba
This commit is contained in:
parent
4cfef0441f
commit
f1ddbf5c72
3
.github/workflows/build.yaml
vendored
3
.github/workflows/build.yaml
vendored
@ -91,9 +91,6 @@ jobs:
|
|||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
build-args: |
|
|
||||||
KERNEL_BUILDER_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/kernel-builder:2.0.0-cuda11.8
|
|
||||||
PYTORCH_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/torch:2.0.0-cuda11.8
|
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
|
32
Dockerfile
32
Dockerfile
@ -1,8 +1,3 @@
|
|||||||
# allow using other images to build kernels
|
|
||||||
ARG KERNEL_BUILDER_IMAGE=kernel-builder
|
|
||||||
# Allow using other images as pytorch base image
|
|
||||||
ARG PYTORCH_IMAGE=pytorch-install
|
|
||||||
|
|
||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
@ -40,6 +35,7 @@ FROM ubuntu:22.04 as pytorch-install
|
|||||||
ARG PYTORCH_VERSION=2.0.0
|
ARG PYTORCH_VERSION=2.0.0
|
||||||
ARG PYTHON_VERSION=3.9
|
ARG PYTHON_VERSION=3.9
|
||||||
ARG CUDA_VERSION=11.8
|
ARG CUDA_VERSION=11.8
|
||||||
|
ARG MAMBA_VERSION=23.1.0-1
|
||||||
ARG CUDA_CHANNEL=nvidia
|
ARG CUDA_CHANNEL=nvidia
|
||||||
ARG INSTALL_CHANNEL=pytorch
|
ARG INSTALL_CHANNEL=pytorch
|
||||||
# Automatically set by buildx
|
# Automatically set by buildx
|
||||||
@ -59,16 +55,15 @@ RUN /usr/sbin/update-ccache-symlinks && \
|
|||||||
ENV PATH /opt/conda/bin:$PATH
|
ENV PATH /opt/conda/bin:$PATH
|
||||||
|
|
||||||
# Install conda
|
# Install conda
|
||||||
# translating Docker's TARGETPLATFORM into miniconda arches
|
# translating Docker's TARGETPLATFORM into mamba arches
|
||||||
RUN case ${TARGETPLATFORM} in \
|
RUN case ${TARGETPLATFORM} in \
|
||||||
"linux/arm64") MINICONDA_ARCH=aarch64 ;; \
|
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
||||||
*) MINICONDA_ARCH=x86_64 ;; \
|
*) MAMBA_ARCH=x86_64 ;; \
|
||||||
esac && \
|
esac && \
|
||||||
curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh"
|
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
||||||
# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431
|
RUN chmod +x ~/mambaforge.sh && \
|
||||||
RUN chmod +x ~/miniconda.sh && \
|
bash ~/mambaforge.sh -b -p /opt/conda && \
|
||||||
bash ~/miniconda.sh -b -p /opt/conda && \
|
rm ~/mambaforge.sh
|
||||||
rm ~/miniconda.sh
|
|
||||||
|
|
||||||
# Install pytorch
|
# Install pytorch
|
||||||
# On arm64 we exit with an error code
|
# On arm64 we exit with an error code
|
||||||
@ -80,7 +75,7 @@ RUN case ${TARGETPLATFORM} in \
|
|||||||
/opt/conda/bin/conda clean -ya
|
/opt/conda/bin/conda clean -ya
|
||||||
|
|
||||||
# CUDA kernels builder image
|
# CUDA kernels builder image
|
||||||
FROM $PYTORCH_IMAGE as kernel-builder
|
FROM pytorch-install as kernel-builder
|
||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
ninja-build \
|
ninja-build \
|
||||||
@ -91,7 +86,7 @@ RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
|||||||
|
|
||||||
|
|
||||||
# Build Flash Attention CUDA kernels
|
# Build Flash Attention CUDA kernels
|
||||||
FROM $KERNEL_BUILDER_IMAGE as flash-att-builder
|
FROM kernel-builder as flash-att-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
@ -101,7 +96,7 @@ COPY server/Makefile-flash-att Makefile
|
|||||||
RUN make build-flash-attention
|
RUN make build-flash-attention
|
||||||
|
|
||||||
# Build Transformers CUDA kernels
|
# Build Transformers CUDA kernels
|
||||||
FROM $KERNEL_BUILDER_IMAGE as transformers-builder
|
FROM kernel-builder as transformers-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
@ -110,9 +105,6 @@ COPY server/Makefile-transformers Makefile
|
|||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN BUILD_EXTENSIONS="True" make build-transformers
|
RUN BUILD_EXTENSIONS="True" make build-transformers
|
||||||
|
|
||||||
# re-export because `COPY --from` does not support ARG vars directly
|
|
||||||
FROM $PYTORCH_IMAGE as pytorch
|
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM ubuntu:22.04 as base
|
FROM ubuntu:22.04 as base
|
||||||
|
|
||||||
@ -144,7 +136,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy conda with PyTorch installed
|
# Copy conda with PyTorch installed
|
||||||
COPY --from=pytorch /opt/conda /opt/conda
|
COPY --from=pytorch-install /opt/conda /opt/conda
|
||||||
|
|
||||||
# Copy build artifacts from flash attention builder
|
# Copy build artifacts from flash attention builder
|
||||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
Loading…
Reference in New Issue
Block a user