mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
use private registry for caching
This commit is contained in:
parent
158d803383
commit
a89d745d02
3
.github/workflows/build.yaml
vendored
3
.github/workflows/build.yaml
vendored
@ -91,6 +91,9 @@ jobs:
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
build-args:
|
||||
- KERNEL_BUILDER_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/kernel-builder:2.0.0-cuda11.8
|
||||
- PYTORCH_IMAGE=registry.internal.huggingface.tech/pytorch-base-images/torch:2.0.0-cuda11.8
|
||||
file: Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
platforms: 'linux/amd64'
|
||||
|
2
.github/workflows/tests.yaml
vendored
2
.github/workflows/tests.yaml
vendored
@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
run_tests:
|
||||
runs-on: self-hosted
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
env:
|
||||
SCCACHE_GHA_ENABLED: "on"
|
||||
|
16
Dockerfile
16
Dockerfile
@ -1,3 +1,8 @@
|
||||
# allow using other images to build kernels
|
||||
ARG KERNEL_BUILDER_IMAGE=kernel-builder
|
||||
# Allow using other images as pytorch base image
|
||||
ARG PYTORCH_IMAGE=pytorch-install
|
||||
|
||||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
|
||||
WORKDIR /usr/src
|
||||
@ -75,7 +80,7 @@ RUN case ${TARGETPLATFORM} in \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
|
||||
# CUDA kernels builder image
|
||||
FROM pytorch-install as kernel-builder
|
||||
FROM $PYTORCH_IMAGE as kernel-builder
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
ninja-build \
|
||||
@ -86,7 +91,7 @@ RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
||||
|
||||
|
||||
# Build Flash Attention CUDA kernels
|
||||
FROM kernel-builder as flash-att-builder
|
||||
FROM $KERNEL_BUILDER_IMAGE as flash-att-builder
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
@ -96,7 +101,7 @@ COPY server/Makefile-flash-att Makefile
|
||||
RUN make build-flash-attention
|
||||
|
||||
# Build Transformers CUDA kernels
|
||||
FROM kernel-builder as transformers-builder
|
||||
FROM $KERNEL_BUILDER_IMAGE as transformers-builder
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
@ -105,6 +110,9 @@ COPY server/Makefile-transformers Makefile
|
||||
# Build specific version of transformers
|
||||
RUN BUILD_EXTENSIONS="True" make build-transformers
|
||||
|
||||
# re-export because `COPY --from` does not support ARG vars directly
|
||||
FROM $PYTORCH_IMAGE as pytorch
|
||||
|
||||
# Text Generation Inference base image
|
||||
FROM ubuntu:22.04 as base
|
||||
|
||||
@ -136,7 +144,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy conda with PyTorch installed
|
||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
||||
COPY --from=pytorch /opt/conda /opt/conda
|
||||
|
||||
# Copy build artifacts from flash attention builder
|
||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||
|
Loading…
Reference in New Issue
Block a user