mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
chore: update torch (#1730)
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
c99ecd77ec
commit
b4ef038837
@ -39,7 +39,7 @@ RUN cargo build --release
|
|||||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as pytorch-install
|
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 as pytorch-install
|
||||||
|
|
||||||
ARG PYTORCH_VERSION=2.1.1
|
ARG PYTORCH_VERSION=2.3.0
|
||||||
ARG PYTHON_VERSION=3.10
|
ARG PYTHON_VERSION=3.10
|
||||||
# Keep in sync with `server/pyproject.toml
|
# Keep in sync with `server/pyproject.toml
|
||||||
ARG CUDA_VERSION=12.1
|
ARG CUDA_VERSION=12.1
|
||||||
@ -149,6 +149,8 @@ FROM kernel-builder as vllm-builder
|
|||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
||||||
|
|
||||||
COPY server/Makefile-vllm Makefile
|
COPY server/Makefile-vllm Makefile
|
||||||
|
|
||||||
# Build specific version of vllm
|
# Build specific version of vllm
|
||||||
@ -210,7 +212,7 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c
|
|||||||
COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||||
COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||||
|
|
||||||
# Install vllm/flash-attention dependencies
|
# Install flash-attention dependencies
|
||||||
RUN pip install einops --no-cache-dir
|
RUN pip install einops --no-cache-dir
|
||||||
|
|
||||||
# Install server
|
# Install server
|
||||||
@ -246,6 +248,7 @@ ENTRYPOINT ["./entrypoint.sh"]
|
|||||||
FROM base
|
FROM base
|
||||||
|
|
||||||
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
||||||
|
RUN chmod +x /tgi-entrypoint.sh
|
||||||
|
|
||||||
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||||
CMD ["--json-output"]
|
CMD ["--json-output"]
|
||||||
|
@ -4,7 +4,7 @@ vllm-cuda:
|
|||||||
git clone https://github.com/OlivierDehaene/vllm.git vllm
|
git clone https://github.com/OlivierDehaene/vllm.git vllm
|
||||||
|
|
||||||
build-vllm-cuda: vllm-cuda
|
build-vllm-cuda: vllm-cuda
|
||||||
cd vllm && git fetch && git checkout 4bec8cee87f6bb8cebaec297029713cd2082e0b2
|
cd vllm && git fetch && git checkout 3d4693536dcb69f036c26b016a35839b99ebed59
|
||||||
cd vllm && python setup.py build
|
cd vllm && python setup.py build
|
||||||
|
|
||||||
install-vllm-cuda: build-vllm-cuda
|
install-vllm-cuda: build-vllm-cuda
|
||||||
|
Loading…
Reference in New Issue
Block a user