diff --git a/Dockerfile b/Dockerfile index e7630ddf..cf5e0ed6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,10 +39,13 @@ RUN cargo build --release # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile FROM nvidia/cuda:12.1.0-devel-ubuntu20.04 as pytorch-install -ARG PYTORCH_VERSION=2.1.0 +ARG PYTORCH_VERSION=2.1.1 ARG PYTHON_VERSION=3.10 # Keep in sync with `server/pyproject.toml +ARG CUDA_VERSION=12.1 ARG MAMBA_VERSION=23.3.1-1 +ARG CUDA_CHANNEL=nvidia +ARG INSTALL_CHANNEL=pytorch # Automatically set by buildx ARG TARGETPLATFORM @@ -72,12 +75,10 @@ RUN chmod +x ~/mambaforge.sh && \ RUN case ${TARGETPLATFORM} in \ "linux/arm64") exit 1 ;; \ *) /opt/conda/bin/conda update -y conda && \ - /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ + /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ esac && \ /opt/conda/bin/conda clean -ya -RUN pip install torch==${PYTORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir - # CUDA kernels builder image FROM pytorch-install as kernel-builder diff --git a/server/Makefile b/server/Makefile index 4509af50..92958d02 100644 --- a/server/Makefile +++ b/server/Makefile @@ -16,14 +16,10 @@ gen-server: find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; touch text_generation_server/pb/__init__.py -install-torch: - # Install specific version of torch - pip install torch==2.1.0 --extra-index-url https://download.pytorch.org/whl/cu121 --no-cache-dir - -install: gen-server install-torch +install: gen-server pip install pip --upgrade pip install -r requirements.txt - pip install -e ".[bnb, accelerate, torch, peft]" + pip install -e ".[bnb, accelerate, quantize, peft]" run-dev: SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded diff --git a/server/poetry.lock b/server/poetry.lock index d07c2c85..48ae40fe 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -2782,4 +2782,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "354dffb296d5b93ff2df541498cf03176ca0c18639d79fdd7339cd33caecde50" +content-hash = "cd3fb4b4e4aaf100f6015afa8c9adc28c22e6c0b48752452892dc3d004c1562a" diff --git a/server/pyproject.toml b/server/pyproject.toml index 15b49343..52431eea 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -31,7 +31,7 @@ einops = "^0.6.1" texttable = { version = "^1.6.7", optional = true } datasets = { version = "^2.14.0", optional = true } peft = { version = "^0.4.0", optional = true } -torch = { version = "^2.1.0", optional = true } +torch = { version = "^2.1.1", optional = true } scipy = "^1.11.1" pillow = "^10.0.0"