mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Set TGI_LLAMA_PKG_CUDA from CUDA_VERSION
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
dbee804129
commit
c52f08351f
@ -2,6 +2,7 @@ FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS deps
|
|||||||
|
|
||||||
ARG llama_version=b4628
|
ARG llama_version=b4628
|
||||||
ARG llama_cuda_arch=75-real;80-real;86-real;89-real;90-real
|
ARG llama_cuda_arch=75-real;80-real;86-real;89-real;90-real
|
||||||
|
ENV TGI_LLAMA_PKG_CUDA=cuda-${CUDA_VERSION%.*}
|
||||||
|
|
||||||
WORKDIR /opt/src
|
WORKDIR /opt/src
|
||||||
|
|
||||||
@ -50,7 +51,6 @@ RUN cargo chef cook \
|
|||||||
--profile release-opt \
|
--profile release-opt \
|
||||||
--package text-generation-router-llamacpp
|
--package text-generation-router-llamacpp
|
||||||
COPY . .
|
COPY . .
|
||||||
ENV TGI_LLAMA_PKG_CUDA=cuda-12.6
|
|
||||||
RUN cargo build \
|
RUN cargo build \
|
||||||
--profile release-opt \
|
--profile release-opt \
|
||||||
--package text-generation-router-llamacpp --frozen
|
--package text-generation-router-llamacpp --frozen
|
||||||
|
Loading…
Reference in New Issue
Block a user