mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
use private registry for caching
This commit is contained in:
parent
158d803383
commit
a89d745d02
3
.github/workflows/build.yaml
vendored
3
.github/workflows/build.yaml
vendored
@ -91,6 +91,9 @@ jobs:
|
|||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
|
build-args:
|
||||||
|
- KERNEL_BUILDER_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/kernel-builder:2.0.0-cuda11.8
|
||||||
|
- PYTORCH_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/torch:2.0.0-cuda11.8
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
|
2
.github/workflows/tests.yaml
vendored
2
.github/workflows/tests.yaml
vendored
@ -17,7 +17,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_tests:
|
run_tests:
|
||||||
runs-on: self-hosted
|
runs-on: ubuntu-20.04
|
||||||
|
|
||||||
env:
|
env:
|
||||||
SCCACHE_GHA_ENABLED: "on"
|
SCCACHE_GHA_ENABLED: "on"
|
||||||
|
16
Dockerfile
16
Dockerfile
@ -1,3 +1,8 @@
|
|||||||
|
# allow using other images to build kernels
|
||||||
|
ARG KERNEL_BUILDER_IMAGE=kernel-builder
|
||||||
|
# Allow using other images as pytorch base image
|
||||||
|
ARG PYTORCH_IMAGE=pytorch-install
|
||||||
|
|
||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
@ -75,7 +80,7 @@ RUN case ${TARGETPLATFORM} in \
|
|||||||
/opt/conda/bin/conda clean -ya
|
/opt/conda/bin/conda clean -ya
|
||||||
|
|
||||||
# CUDA kernels builder image
|
# CUDA kernels builder image
|
||||||
FROM pytorch-install as kernel-builder
|
FROM $PYTORCH_IMAGE as kernel-builder
|
||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
ninja-build \
|
ninja-build \
|
||||||
@ -86,7 +91,7 @@ RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
|||||||
|
|
||||||
|
|
||||||
# Build Flash Attention CUDA kernels
|
# Build Flash Attention CUDA kernels
|
||||||
FROM kernel-builder as flash-att-builder
|
FROM $KERNEL_BUILDER_IMAGE as flash-att-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
@ -96,7 +101,7 @@ COPY server/Makefile-flash-att Makefile
|
|||||||
RUN make build-flash-attention
|
RUN make build-flash-attention
|
||||||
|
|
||||||
# Build Transformers CUDA kernels
|
# Build Transformers CUDA kernels
|
||||||
FROM kernel-builder as transformers-builder
|
FROM $KERNEL_BUILDER_IMAGE as transformers-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
@ -105,6 +110,9 @@ COPY server/Makefile-transformers Makefile
|
|||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN BUILD_EXTENSIONS="True" make build-transformers
|
RUN BUILD_EXTENSIONS="True" make build-transformers
|
||||||
|
|
||||||
|
# re-export because `COPY --from` does not support ARG vars directly
|
||||||
|
FROM $PYTORCH_IMAGE as pytorch
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM ubuntu:22.04 as base
|
FROM ubuntu:22.04 as base
|
||||||
|
|
||||||
@ -136,7 +144,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy conda with PyTorch installed
|
# Copy conda with PyTorch installed
|
||||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
COPY --from=pytorch /opt/conda /opt/conda
|
||||||
|
|
||||||
# Copy build artifacts from flash attention builder
|
# Copy build artifacts from flash attention builder
|
||||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
Loading…
Reference in New Issue
Block a user