From 951eb62b56e83beb90d440b21189bcdf78520a8f Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 5 Feb 2025 11:48:40 +0100
Subject: [PATCH] Using the "lockfile".

---
 Dockerfile                    |   3 +-
 Dockerfile_amd                |  34 +--
 Dockerfile_intel              |  31 +--
 server/Makefile               |   5 +
 server/requirements_cuda.txt  | 440 +++++++++++++++++++++++++++++-----
 server/requirements_intel.txt | 423 +++++++++++++++++++++++++++-----
 server/requirements_rocm.txt  | 423 +++++++++++++++++++++++++++-----
 7 files changed, 1144 insertions(+), 215 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 72005333..4f272679 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -233,9 +233,8 @@ COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
     make gen-server && \
-    python -c "from text_generation_server.pb import generate_pb2" && \
     pip install -U pip uv && \
-    uv pip install -e ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir # && \
+    uv pip install -r requirements_cuda.txt
     # uv pip install nvidia-nccl-cu12==2.22.3
 
 ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
diff --git a/Dockerfile_amd b/Dockerfile_amd
index 205e46d9..92a9fed7 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -1,5 +1,5 @@
 # Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -104,7 +104,7 @@ RUN case ${TARGETPLATFORM} in \
     /opt/conda/bin/conda clean -ya
 
 # Install flash-attention, torch dependencies
-RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
+RUN python3 -m pip install --upgrade pip && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
 
 RUN conda install mkl=2021
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/
@@ -234,7 +234,6 @@ FROM kernel-builder AS vllm-builder
 WORKDIR /usr/src
 
 COPY server/Makefile-vllm Makefile
-RUN pip install setuptools_scm
 
 # Build specific version of vllm
 RUN make build-vllm-rocm
@@ -268,24 +267,6 @@ COPY server/exllamav2_kernels/ .
 
 RUN python setup.py build
 
-FROM kernel-builder AS marlin-kernels
-WORKDIR /usr/src
-ENV MARLIN_KERNELS_BRANCH=v0.3.6
-ENV VLLM_TARGET_DEVICE=rocm
-RUN git clone https://github.com/danieldk/marlin-kernels.git && \
-    cd marlin-kernels && \
-    git checkout ${MARLIN_KERNELS_BRANCH} && \
-    python setup.py install
-
-FROM kernel-builder AS moe-kernels
-WORKDIR /usr/src
-ENV MOE_KERNELS_BRANCH=v0.8.2
-ENV VLLM_TARGET_DEVICE=rocm
-RUN git clone https://github.com/danieldk/moe-kernels.git && \
-    cd moe-kernels && \
-    git checkout ${MOE_KERNELS_BRANCH} && \
-    python setup.py install
-
 FROM install_deps AS base-copy
 
 # Text Generation Inference base env
@@ -308,21 +289,14 @@ COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311
 # Copy build artifacts from exllamav2 kernels builder
 COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
-# Copy build artifacts from marlin kernels
-COPY --from=marlin-kernels /usr/src/marlin-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
-
-# Copy build artifacts from moe kernels
-COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
-
 # Install server
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
-ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
     make gen-server && \
-    pip install -U pip uv && \
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    pip install -r requirements_rocm.txt --no-cache-dir
+    # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
diff --git a/Dockerfile_intel b/Dockerfile_intel
index 0f0d4383..62fad650 100644
--- a/Dockerfile_intel
+++ b/Dockerfile_intel
@@ -1,6 +1,6 @@
 ARG PLATFORM=xpu
 
-FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -97,10 +97,11 @@ ENV HF_HOME=/data \
 
 
 WORKDIR /usr/src
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
 
 RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
 
@@ -108,20 +109,15 @@ RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
-ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
     make gen-server && \
-    pip install -U pip uv && \
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    pip install -r requirements_intel.txt && \
+    pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
 ENV CCL_ZE_IPC_EXCHANGE=sockets
 #ENV TORCH_LLM_ALLREDUCE=1
 #ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
-ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
-
-RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout 1ccf72b2d11cd00b47aef6d6cd054c088aa6f083
-RUN cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc,ats-m150' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -213,11 +209,10 @@ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
-ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
     make gen-server && \
-    pip install -U pip uv && \
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    uv pip install -r requirements_intel.txt --no-cache-dir
+    # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -227,9 +222,9 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
 
 FROM ${PLATFORM} AS final
-ENV ATTENTION=flashdecoding-ipex
-ENV PREFIX_CACHING=1
-ENV PREFILL_CHUNKING=1
+ENV ATTENTION=paged
+ENV PREFIX_CACHING=0
+ENV PREFILL_CHUNKING=0
 ENV CUDA_GRAPHS=0
 ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]
diff --git a/server/Makefile b/server/Makefile
index 252e355d..30a31a1c 100644
--- a/server/Makefile
+++ b/server/Makefile
@@ -35,3 +35,8 @@ install-cuda: install-server install-flash-attention-v2-cuda install-flash-atten
 	uv pip install nvidia-nccl-cu12==2.22.3
 
 install-rocm: install-server install-flash-attention-v2-rocm  install-vllm-rocm
+
+export-requirements:
+	uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt
+	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt
+	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt
diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt
index ee75b2b5..d450c8a8 100644
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@@ -1,55 +1,385 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt
+accelerate==1.3.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   peft
+aiohappyeyeballs==2.4.4
+    # via aiohttp
+aiohttp==3.11.11
+    # via
+    #   datasets
+    #   fsspec
+aiosignal==1.3.2
+    # via aiohttp
+airportsdata==20241001
+    # via outlines
+annotated-types==0.7.0
+    # via pydantic
+attention-kernels @ https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
+    # via text-generation-server (pyproject.toml)
+attrs==25.1.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+bitsandbytes==0.45.1
+    # via text-generation-server (pyproject.toml)
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via typer
+cloudpickle==3.1.1
+    # via outlines
+compressed-tensors==0.9.1
+    # via text-generation-server (pyproject.toml)
+datasets==2.21.0
+    # via text-generation-server (pyproject.toml)
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+diskcache==5.6.3
+    # via outlines
+einops==0.8.0
+    # via text-generation-server (pyproject.toml)
+filelock==3.16.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.6.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+genson==1.3.0
+    # via outlines
+googleapis-common-protos==1.65.0
+    # via
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+grpc-interceptor==0.15.4
+    # via text-generation-server (pyproject.toml)
+grpcio==1.68.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   grpc-interceptor
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+grpcio-reflection==1.68.0
+    # via text-generation-server (pyproject.toml)
+grpcio-status==1.68.0
+    # via text-generation-server (pyproject.toml)
+hf-transfer==0.1.8
+    # via text-generation-server (pyproject.toml)
+huggingface-hub==0.28.1
+    # via
+    #   accelerate
+    #   datasets
+    #   peft
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   requests
+    #   yarl
+importlib-metadata==7.1.0
+    # via opentelemetry-api
+interegular==0.3.3
+    # via
+    #   outlines
+    #   outlines-core
+jinja2==3.1.5
+    # via
+    #   outlines
+    #   torch
+jsonschema==4.23.0
+    # via
+    #   outlines
+    #   outlines-core
+jsonschema-specifications==2024.10.1
+    # via jsonschema
+lark==1.2.2
+    # via outlines
+loguru==0.7.3
+    # via text-generation-server (pyproject.toml)
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+marlin-kernels @ https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl
+    # via text-generation-server (pyproject.toml)
+mdurl==0.1.2
+    # via markdown-it-py
+moe-kernels @ https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
+    # via text-generation-server (pyproject.toml)
+mpmath==1.3.0
+    # via sympy
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+nest-asyncio==1.6.0
+    # via outlines
+networkx==3.4.2
+    # via torch
+numpy==1.26.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   bitsandbytes
+    #   datasets
+    #   outlines
+    #   pandas
+    #   peft
+    #   scipy
+    #   transformers
+nvidia-cublas-cu12==12.4.5.8
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
+nvidia-ml-py==12.570.86
+    # via moe-kernels
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+opentelemetry-api==1.30.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.30.0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-exporter-otlp-proto-common==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.51b0
+    # via opentelemetry-instrumentation-grpc
+opentelemetry-instrumentation-grpc==0.51b0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-proto==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.51b0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+outlines==0.1.14
+    # via text-generation-server (pyproject.toml)
+outlines-core==0.1.26
+    # via outlines
+packaging==24.1
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-instrumentation
+    #   peft
+    #   transformers
+pandas==2.2.3
+    # via datasets
+peft==0.14.0
+    # via text-generation-server (pyproject.toml)
+pillow==11.1.0
+    # via text-generation-server (pyproject.toml)
+prometheus-client==0.21.1
+    # via text-generation-server (pyproject.toml)
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   googleapis-common-protos
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-proto
+psutil==6.1.1
+    # via
+    #   accelerate
+    #   peft
+py-cpuinfo==9.0.0
+    # via text-generation-server (pyproject.toml)
+pyarrow==19.0.0
+    # via datasets
+pycountry==24.6.1
+    # via outlines
+pydantic==2.10.6
+    # via
+    #   compressed-tensors
+    #   outlines
+pydantic-core==2.27.2
+    # via pydantic
+pygments==2.18.0
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2025.1
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   peft
+    #   transformers
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   outlines
+regex==2024.9.11
+    # via transformers
+requests==2.32.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-exporter-otlp-proto-http
+    #   outlines
+    #   transformers
+rich==13.9.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   typer
+rpds-py==0.22.3
+    # via
+    #   jsonschema
+    #   referencing
+safetensors==0.4.5
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   peft
+    #   transformers
+scipy==1.13.1
+    # via text-generation-server (pyproject.toml)
+sentencepiece==0.2.0
+    # via text-generation-server (pyproject.toml)
+setuptools==75.2.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sympy==1.13.1
+    # via torch
+texttable==1.7.0
+    # via text-generation-server (pyproject.toml)
+tokenizers==0.20.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   transformers
+torch==2.6.0
+    # via
+    #   accelerate
+    #   attention-kernels
+    #   bitsandbytes
+    #   compressed-tensors
+    #   marlin-kernels
+    #   moe-kernels
+    #   outlines
+    #   peft
+tqdm==4.66.5
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   outlines
+    #   peft
+    #   transformers
+transformers==4.46.3
+    # via
+    #   compressed-tensors
+    #   peft
+triton==3.2.0
+    # via
+    #   moe-kernels
+    #   torch
+typer==0.15.1
+    # via text-generation-server (pyproject.toml)
+typing-extensions==4.12.2
+    # via
+    #   huggingface-hub
+    #   opentelemetry-sdk
+    #   outlines
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   torch
+    #   typer
+tzdata==2025.1
+    # via pandas
+urllib3==2.2.3
+    # via requests
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+xxhash==3.5.0
+    # via datasets
+yarl==1.18.3
+    # via aiohttp
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/server/requirements_intel.txt b/server/requirements_intel.txt
index ee75b2b5..cdadb937 100644
--- a/server/requirements_intel.txt
+++ b/server/requirements_intel.txt
@@ -1,55 +1,368 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt
+accelerate==1.3.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   peft
+aiohappyeyeballs==2.4.4
+    # via aiohttp
+aiohttp==3.11.11
+    # via
+    #   datasets
+    #   fsspec
+aiosignal==1.3.2
+    # via aiohttp
+airportsdata==20241001
+    # via outlines
+annotated-types==0.7.0
+    # via pydantic
+attrs==25.1.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via typer
+cloudpickle==3.1.1
+    # via outlines
+compressed-tensors==0.9.1
+    # via text-generation-server (pyproject.toml)
+datasets==2.21.0
+    # via text-generation-server (pyproject.toml)
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+diskcache==5.6.3
+    # via outlines
+einops==0.8.0
+    # via text-generation-server (pyproject.toml)
+filelock==3.16.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.6.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+genson==1.3.0
+    # via outlines
+googleapis-common-protos==1.65.0
+    # via
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+grpc-interceptor==0.15.4
+    # via text-generation-server (pyproject.toml)
+grpcio==1.68.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   grpc-interceptor
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+grpcio-reflection==1.68.0
+    # via text-generation-server (pyproject.toml)
+grpcio-status==1.68.0
+    # via text-generation-server (pyproject.toml)
+hf-transfer==0.1.8
+    # via text-generation-server (pyproject.toml)
+huggingface-hub==0.28.1
+    # via
+    #   accelerate
+    #   datasets
+    #   peft
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   requests
+    #   yarl
+importlib-metadata==7.1.0
+    # via opentelemetry-api
+interegular==0.3.3
+    # via
+    #   outlines
+    #   outlines-core
+jinja2==3.1.5
+    # via
+    #   outlines
+    #   torch
+jsonschema==4.23.0
+    # via
+    #   outlines
+    #   outlines-core
+jsonschema-specifications==2024.10.1
+    # via jsonschema
+lark==1.2.2
+    # via outlines
+loguru==0.7.3
+    # via text-generation-server (pyproject.toml)
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+nest-asyncio==1.6.0
+    # via outlines
+networkx==3.4.2
+    # via torch
+numpy==1.26.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   datasets
+    #   outlines
+    #   pandas
+    #   peft
+    #   scipy
+    #   transformers
+nvidia-cublas-cu12==12.4.5.8
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+opentelemetry-api==1.30.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.30.0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-exporter-otlp-proto-common==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.51b0
+    # via opentelemetry-instrumentation-grpc
+opentelemetry-instrumentation-grpc==0.51b0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-proto==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.51b0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+outlines==0.1.14
+    # via text-generation-server (pyproject.toml)
+outlines-core==0.1.26
+    # via outlines
+packaging==24.1
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-instrumentation
+    #   peft
+    #   transformers
+pandas==2.2.3
+    # via datasets
+peft==0.14.0
+    # via text-generation-server (pyproject.toml)
+pillow==11.1.0
+    # via text-generation-server (pyproject.toml)
+prometheus-client==0.21.1
+    # via text-generation-server (pyproject.toml)
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   googleapis-common-protos
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-proto
+psutil==6.1.1
+    # via
+    #   accelerate
+    #   peft
+py-cpuinfo==9.0.0
+    # via text-generation-server (pyproject.toml)
+pyarrow==19.0.0
+    # via datasets
+pycountry==24.6.1
+    # via outlines
+pydantic==2.10.6
+    # via
+    #   compressed-tensors
+    #   outlines
+pydantic-core==2.27.2
+    # via pydantic
+pygments==2.18.0
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2025.1
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   peft
+    #   transformers
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   outlines
+regex==2024.9.11
+    # via transformers
+requests==2.32.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-exporter-otlp-proto-http
+    #   outlines
+    #   transformers
+rich==13.9.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   typer
+rpds-py==0.22.3
+    # via
+    #   jsonschema
+    #   referencing
+safetensors==0.4.5
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   peft
+    #   transformers
+scipy==1.13.1
+    # via text-generation-server (pyproject.toml)
+sentencepiece==0.2.0
+    # via text-generation-server (pyproject.toml)
+setuptools==75.2.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sympy==1.13.1
+    # via torch
+texttable==1.7.0
+    # via text-generation-server (pyproject.toml)
+tokenizers==0.20.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   transformers
+torch==2.6.0
+    # via
+    #   accelerate
+    #   compressed-tensors
+    #   outlines
+    #   peft
+tqdm==4.66.5
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   outlines
+    #   peft
+    #   transformers
+transformers==4.46.3
+    # via
+    #   compressed-tensors
+    #   peft
+triton==3.2.0
+    # via torch
+typer==0.15.1
+    # via text-generation-server (pyproject.toml)
+typing-extensions==4.12.2
+    # via
+    #   huggingface-hub
+    #   opentelemetry-sdk
+    #   outlines
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   torch
+    #   typer
+tzdata==2025.1
+    # via pandas
+urllib3==2.2.3
+    # via requests
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+xxhash==3.5.0
+    # via datasets
+yarl==1.18.3
+    # via aiohttp
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt
index ee75b2b5..2c768eaf 100644
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@@ -1,55 +1,368 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt
+accelerate==1.3.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   peft
+aiohappyeyeballs==2.4.4
+    # via aiohttp
+aiohttp==3.11.11
+    # via
+    #   datasets
+    #   fsspec
+aiosignal==1.3.2
+    # via aiohttp
+airportsdata==20241001
+    # via outlines
+annotated-types==0.7.0
+    # via pydantic
+attrs==25.1.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.4.0
+    # via requests
+click==8.1.7
+    # via typer
+cloudpickle==3.1.1
+    # via outlines
+compressed-tensors==0.9.1
+    # via text-generation-server (pyproject.toml)
+datasets==2.21.0
+    # via text-generation-server (pyproject.toml)
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+diskcache==5.6.3
+    # via outlines
+einops==0.8.0
+    # via text-generation-server (pyproject.toml)
+filelock==3.16.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.5.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2024.6.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+genson==1.3.0
+    # via outlines
+googleapis-common-protos==1.65.0
+    # via
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+grpc-interceptor==0.15.4
+    # via text-generation-server (pyproject.toml)
+grpcio==1.68.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   grpc-interceptor
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-exporter-otlp-proto-grpc
+grpcio-reflection==1.68.0
+    # via text-generation-server (pyproject.toml)
+grpcio-status==1.68.0
+    # via text-generation-server (pyproject.toml)
+hf-transfer==0.1.8
+    # via text-generation-server (pyproject.toml)
+huggingface-hub==0.28.1
+    # via
+    #   accelerate
+    #   datasets
+    #   peft
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   requests
+    #   yarl
+importlib-metadata==7.1.0
+    # via opentelemetry-api
+interegular==0.3.3
+    # via
+    #   outlines
+    #   outlines-core
+jinja2==3.1.5
+    # via
+    #   outlines
+    #   torch
+jsonschema==4.23.0
+    # via
+    #   outlines
+    #   outlines-core
+jsonschema-specifications==2024.10.1
+    # via jsonschema
+lark==1.2.2
+    # via outlines
+loguru==0.7.3
+    # via text-generation-server (pyproject.toml)
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.1.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via datasets
+nest-asyncio==1.6.0
+    # via outlines
+networkx==3.4.2
+    # via torch
+numpy==1.26.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   datasets
+    #   outlines
+    #   pandas
+    #   peft
+    #   scipy
+    #   transformers
+nvidia-cublas-cu12==12.4.5.8
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.4.127
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127
+    # via torch
+nvidia-cuda-runtime-cu12==12.4.127
+    # via torch
+nvidia-cudnn-cu12==9.1.0.70
+    # via torch
+nvidia-cufft-cu12==11.2.1.3
+    # via torch
+nvidia-curand-cu12==10.3.5.147
+    # via torch
+nvidia-cusolver-cu12==11.6.1.9
+    # via torch
+nvidia-cusparse-cu12==12.3.1.170
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
+nvidia-nccl-cu12==2.21.5
+    # via torch
+nvidia-nvjitlink-cu12==12.4.127
+    # via
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.4.127
+    # via torch
+opentelemetry-api==1.30.0
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.30.0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-exporter-otlp-proto-common==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.30.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.51b0
+    # via opentelemetry-instrumentation-grpc
+opentelemetry-instrumentation-grpc==0.51b0
+    # via text-generation-server (pyproject.toml)
+opentelemetry-proto==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.51b0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+    #   opentelemetry-sdk
+outlines==0.1.14
+    # via text-generation-server (pyproject.toml)
+outlines-core==0.1.26
+    # via outlines
+packaging==24.1
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-instrumentation
+    #   peft
+    #   transformers
+pandas==2.2.3
+    # via datasets
+peft==0.14.0
+    # via text-generation-server (pyproject.toml)
+pillow==11.1.0
+    # via text-generation-server (pyproject.toml)
+prometheus-client==0.21.1
+    # via text-generation-server (pyproject.toml)
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   googleapis-common-protos
+    #   grpcio-reflection
+    #   grpcio-status
+    #   opentelemetry-proto
+psutil==6.1.1
+    # via
+    #   accelerate
+    #   peft
+py-cpuinfo==9.0.0
+    # via text-generation-server (pyproject.toml)
+pyarrow==19.0.0
+    # via datasets
+pycountry==24.6.1
+    # via outlines
+pydantic==2.10.6
+    # via
+    #   compressed-tensors
+    #   outlines
+pydantic-core==2.27.2
+    # via pydantic
+pygments==2.18.0
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+pytz==2025.1
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   accelerate
+    #   datasets
+    #   huggingface-hub
+    #   peft
+    #   transformers
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   outlines
+regex==2024.9.11
+    # via transformers
+requests==2.32.3
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   opentelemetry-exporter-otlp-proto-http
+    #   outlines
+    #   transformers
+rich==13.9.4
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   typer
+rpds-py==0.22.3
+    # via
+    #   jsonschema
+    #   referencing
+safetensors==0.4.5
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   accelerate
+    #   peft
+    #   transformers
+scipy==1.13.1
+    # via text-generation-server (pyproject.toml)
+sentencepiece==0.2.0
+    # via text-generation-server (pyproject.toml)
+setuptools==75.2.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sympy==1.13.1
+    # via torch
+texttable==1.7.0
+    # via text-generation-server (pyproject.toml)
+tokenizers==0.20.3
+    # via
+    #   text-generation-server (pyproject.toml)
+    #   transformers
+torch==2.6.0
+    # via
+    #   accelerate
+    #   compressed-tensors
+    #   outlines
+    #   peft
+tqdm==4.66.5
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   outlines
+    #   peft
+    #   transformers
+transformers==4.46.3
+    # via
+    #   compressed-tensors
+    #   peft
+triton==3.2.0
+    # via torch
+typer==0.15.1
+    # via text-generation-server (pyproject.toml)
+typing-extensions==4.12.2
+    # via
+    #   huggingface-hub
+    #   opentelemetry-sdk
+    #   outlines
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   torch
+    #   typer
+tzdata==2025.1
+    # via pandas
+urllib3==2.2.3
+    # via requests
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-grpc
+xxhash==3.5.0
+    # via datasets
+yarl==1.18.3
+    # via aiohttp
+zipp==3.20.2
+    # via importlib-metadata