Torch 2.6, fork of rotary, eetq updated.

This commit is contained in:
Nicolas Patry 2025-02-12 12:26:15 +01:00
parent 13decd6d44
commit 59ef177d5f
No known key found for this signature in database
GPG Key ID: 4242CEF24CB6DBF9
2 changed files with 12 additions and 12 deletions

View File

@ -48,7 +48,7 @@ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS pytorch-install
WORKDIR /usr/src/
# NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
ARG PYTORCH_VERSION=2.5.1
ARG PYTORCH_VERSION=2.6
ARG PYTHON_VERSION=3.11
# Keep in sync with `server/pyproject.toml
@ -87,7 +87,7 @@ WORKDIR /usr/src
COPY server/Makefile-flash-att Makefile
# Build specific version of flash attention
RUN make build-flash-attention
RUN . .venv/bin/activate && make build-flash-attention
# Build Flash Attention v2 CUDA kernels
FROM kernel-builder AS flash-att-v2-builder
@ -97,14 +97,14 @@ WORKDIR /usr/src
COPY server/Makefile-flash-att-v2 Makefile
# Build specific version of flash attention v2
RUN make build-flash-attention-v2-cuda
RUN . .venv/bin/activate && make build-flash-attention-v2-cuda
# Build Transformers exllama kernels
FROM kernel-builder AS exllama-kernels-builder
WORKDIR /usr/src
COPY server/exllama_kernels/ .
RUN python setup.py build
RUN . .venv/bin/activate && python setup.py build
# Build Transformers exllama kernels
FROM kernel-builder AS exllamav2-kernels-builder
@ -112,47 +112,47 @@ WORKDIR /usr/src
COPY server/Makefile-exllamav2/ Makefile
# Build specific version of transformers
RUN make build-exllamav2
RUN . .venv/bin/activate && make build-exllamav2
# Build Transformers awq kernels
FROM kernel-builder AS awq-kernels-builder
WORKDIR /usr/src
COPY server/Makefile-awq Makefile
# Build specific version of transformers
RUN make build-awq
RUN . .venv/bin/activate && make build-awq
# Build eetq kernels
FROM kernel-builder AS eetq-kernels-builder
WORKDIR /usr/src
COPY server/Makefile-eetq Makefile
# Build specific version of transformers
RUN make build-eetq
RUN . .venv/bin/activate && make build-eetq
# Build Lorax Punica kernels
FROM kernel-builder AS lorax-punica-builder
WORKDIR /usr/src
COPY server/Makefile-lorax-punica Makefile
# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-lorax-punica
RUN . .venv/bin/activate && TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-lorax-punica
# Build Transformers CUDA kernels
FROM kernel-builder AS custom-kernels-builder
WORKDIR /usr/src
COPY server/custom_kernels/ .
# Build specific version of transformers
RUN python setup.py build
RUN . .venv/bin/activate && python setup.py build
# Build mamba kernels
FROM kernel-builder AS mamba-builder
WORKDIR /usr/src
COPY server/Makefile-selective-scan Makefile
RUN make build-all
RUN . .venv/bin/activate && make build-all
# Build flashinfer
FROM kernel-builder AS flashinfer-builder
WORKDIR /usr/src
COPY server/Makefile-flashinfer Makefile
RUN make install-flashinfer
RUN . .venv/bin/activate && make install-flashinfer
# Text Generation Inference base image
FROM nvidia/cuda:12.4.0-base-ubuntu22.04 AS base

View File

@ -1,4 +1,4 @@
eetq_commit := 81e0b14d64088d58ef6acd2c8f3e788d59324407
eetq_commit := 465e9726bf7ae30803a2d0dd9e5d4315aef17491
eetq:
# Clone eetq