mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Moving those deps directly into pyproject.
This commit is contained in:
parent
1e5d27a256
commit
98c1496ea6
@ -105,23 +105,16 @@ COPY server/Makefile server/Makefile
|
|||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
pip install -U pip uv && \
|
pip install -U pip uv && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra intel-xpu --no-install-project && \
|
||||||
. ./.venv/bin/activate && \
|
. ./.venv/bin/activate && \
|
||||||
make gen-server-raw
|
make gen-server-raw
|
||||||
|
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra intel-xpu && \
|
||||||
. ./.venv/bin/activate && \
|
. ./.venv/bin/activate && \
|
||||||
pwd && \
|
pwd && \
|
||||||
text-generation-server --help
|
text-generation-server --help
|
||||||
|
|
||||||
RUN . ./server/.venv/bin/activate && pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN . ./server/.venv/bin/activate && pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN . ./server/.venv/bin/activate && pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
RUN . ./server/.venv/bin/activate && pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
|
|
||||||
|
|
||||||
RUN . ./server/.venv/bin/activate pip install triton-xpu==3.0.0b2 --no-cache-dir
|
|
||||||
|
|
||||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
|
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
|
||||||
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||||
#ENV TORCH_LLM_ALLREDUCE=1
|
#ENV TORCH_LLM_ALLREDUCE=1
|
||||||
@ -194,12 +187,6 @@ RUN case ${TARGETPLATFORM} in \
|
|||||||
RUN conda install -c conda-forge gperftools mkl
|
RUN conda install -c conda-forge gperftools mkl
|
||||||
|
|
||||||
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
||||||
|
|
||||||
RUN pip install triton==3.1.0 py-libnuma
|
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout b7b552baf64283b594665b8687430fe92990e497
|
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout b7b552baf64283b594665b8687430fe92990e497
|
||||||
@ -224,12 +211,12 @@ COPY server/Makefile server/Makefile
|
|||||||
ENV UV_SYSTEM_PYTHON=1
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
pip install -U pip uv && \
|
pip install -U pip uv && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --no-install-project && \
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra intel-cpu --no-install-project && \
|
||||||
. ./.venv/bin/activate && \
|
. ./.venv/bin/activate && \
|
||||||
make gen-server-raw
|
make gen-server-raw
|
||||||
|
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines && \
|
uv sync --frozen --extra gen --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra intel-cpu && \
|
||||||
. ./.venv/bin/activate && \
|
. ./.venv/bin/activate && \
|
||||||
pwd && \
|
pwd && \
|
||||||
text-generation-server --help
|
text-generation-server --help
|
||||||
|
@ -67,6 +67,21 @@ gen = [
|
|||||||
"grpcio-tools>=1.69.0",
|
"grpcio-tools>=1.69.0",
|
||||||
"mypy-protobuf>=3.6.0",
|
"mypy-protobuf>=3.6.0",
|
||||||
]
|
]
|
||||||
|
intel-xpu = [
|
||||||
|
"torch-intel-xpu",
|
||||||
|
"torchaudio-intel-xpu",
|
||||||
|
"torchvision-intel-xpu",
|
||||||
|
"oneccl-intel-xpu",
|
||||||
|
"triton-xpu==3.0.0b2"
|
||||||
|
]
|
||||||
|
intel-cpu = [
|
||||||
|
"torch-intel-cpu",
|
||||||
|
"torchaudio-intel-cpu",
|
||||||
|
"torchvision-intel-cpu",
|
||||||
|
"oneccl-intel-cpu",
|
||||||
|
"triton==3.1.0",
|
||||||
|
"py-libnuma"
|
||||||
|
]
|
||||||
|
|
||||||
[tool.uv.sources]
|
[tool.uv.sources]
|
||||||
attention-kernels.url = "https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
|
attention-kernels.url = "https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
|
||||||
@ -77,6 +92,13 @@ marlin-kernels = [
|
|||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" },
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" },
|
||||||
]
|
]
|
||||||
moe-kernels.url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
|
moe-kernels.url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
|
||||||
|
torch-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl"
|
||||||
|
torchaudio-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl"
|
||||||
|
torchvision-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl"
|
||||||
|
oneccl-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl"
|
||||||
|
torch-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"
|
||||||
|
torchaudio-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"
|
||||||
|
torchvision-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
||||||
|
Loading…
Reference in New Issue
Block a user