Using the "lockfile".

2025-09-11 12:24:53 +00:00 · 2025-02-05 11:48:40 +01:00 · 2025-02-05 11:48:40 +01:00 · 951eb62b56
commit 951eb62b56
parent c1cf36c0dc
7 changed files with 1144 additions and 215 deletions
--- a/3
+++ b/3
@ -233,9 +233,8 @@ COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
    make gen-server && \
    python -c "from text_generation_server.pb import generate_pb2" && \
    pip install -U pip uv && \
-    uv pip install -e ".[attention, bnb, accelerate, compressed-tensors, marlin, moe, quantize, peft, outlines]" --no-cache-dir # && \
+    uv pip install -r requirements_cuda.txt
    # uv pip install nvidia-nccl-cu12==2.22.3
 ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
--- a/34
+++ b/34
@ -1,5 +1,5 @@
 # Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
 WORKDIR /usr/src
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@ -104,7 +104,7 @@ RUN case ${TARGETPLATFORM} in \
    /opt/conda/bin/conda clean -ya
 # Install flash-attention, torch dependencies
-RUN python3 -m pip install --upgrade pip uv && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
+RUN python3 -m pip install --upgrade pip && pip install numpy einops ninja joblib msgpack cmake --no-cache-dir && rm -rf /var/lib/apt/lists/*
 RUN conda install mkl=2021
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/opt/conda/lib/python3.11/site-packages/torch/lib:/opt/conda/lib/
@ -234,7 +234,6 @@ FROM kernel-builder AS vllm-builder
 WORKDIR /usr/src
 COPY server/Makefile-vllm Makefile
 RUN pip install setuptools_scm
 # Build specific version of vllm
 RUN make build-vllm-rocm
@ -268,24 +267,6 @@ COPY server/exllamav2_kernels/ .
 RUN python setup.py build
 FROM kernel-builder AS marlin-kernels
 WORKDIR /usr/src
 ENV MARLIN_KERNELS_BRANCH=v0.3.6
 ENV VLLM_TARGET_DEVICE=rocm
 RUN git clone https://github.com/danieldk/marlin-kernels.git && \
    cd marlin-kernels && \
    git checkout ${MARLIN_KERNELS_BRANCH} && \
    python setup.py install
 FROM kernel-builder AS moe-kernels
 WORKDIR /usr/src
 ENV MOE_KERNELS_BRANCH=v0.8.2
 ENV VLLM_TARGET_DEVICE=rocm
 RUN git clone https://github.com/danieldk/moe-kernels.git && \
    cd moe-kernels && \
    git checkout ${MOE_KERNELS_BRANCH} && \
    python setup.py install
 FROM install_deps AS base-copy
 # Text Generation Inference base env
@ -308,21 +289,14 @@ COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311
 # Copy build artifacts from exllamav2 kernels builder
 COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from marlin kernels
 COPY --from=marlin-kernels /usr/src/marlin-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from moe kernels
 COPY --from=moe-kernels /usr/src/moe-kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Install server
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
    make gen-server && \
-    pip install -U pip uv && \
+    pip install -r requirements_rocm.txt --no-cache-dir
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
--- a/31
+++ b/31
@ -1,6 +1,6 @@
 ARG PLATFORM=xpu
-FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
 WORKDIR /usr/src
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@ -97,10 +97,11 @@ ENV HF_HOME=/data \
 WORKDIR /usr/src
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
-RUN pip install https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
+RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp311-cp311-linux_x86_64.whl --no-cache-dir
 RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
 RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
@ -108,20 +109,15 @@ RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
    make gen-server && \
-    pip install -U pip uv && \
+    pip install -r requirements_intel.txt && \
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
 ENV CCL_ZE_IPC_EXCHANGE=sockets
 #ENV TORCH_LLM_ALLREDUCE=1
 #ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
 ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
 RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout 1ccf72b2d11cd00b47aef6d6cd054c088aa6f083
 RUN cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc,ats-m150' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@ -213,11 +209,10 @@ ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
 ENV UV_SYSTEM_PYTHON=1
 RUN cd server && \
    make gen-server && \
-    pip install -U pip uv && \
+    uv pip install -r requirements_intel.txt --no-cache-dir
-    uv pip install -e ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
+    # pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@ -227,9 +222,9 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
 FROM ${PLATFORM} AS final
-ENV ATTENTION=flashdecoding-ipex
+ENV ATTENTION=paged
-ENV PREFIX_CACHING=1
+ENV PREFIX_CACHING=0
-ENV PREFILL_CHUNKING=1
+ENV PREFILL_CHUNKING=0
 ENV CUDA_GRAPHS=0
 ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]
--- a/server/Makefile
+++ b/server/Makefile
@ -35,3 +35,8 @@ install-cuda: install-server install-flash-attention-v2-cuda install-flash-atten
 	uv pip install nvidia-nccl-cu12==2.22.3
 install-rocm: install-server install-flash-attention-v2-rocm  install-vllm-rocm
 export-requirements:
 	uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt
 	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt
 	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@ -1,55 +1,385 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
+#    uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
+accelerate==1.3.0
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+    # via
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
+    #   text-generation-server (pyproject.toml)
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   peft
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
+aiohappyeyeballs==2.4.4
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
+aiohttp==3.11.11
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   fsspec
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
+aiosignal==1.3.2
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
+airportsdata==20241001
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+annotated-types==0.7.0
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via pydantic
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
+attention-kernels @ https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
+attrs==25.1.0
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   aiohttp
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   jsonschema
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   referencing
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+bitsandbytes==0.45.1
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.8.30
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+charset-normalizer==3.4.0
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
+click==8.1.7
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via typer
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
+cloudpickle==3.1.1
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
+compressed-tensors==0.9.1
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
+datasets==2.21.0
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
+deprecated==1.2.14
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-api
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-grpc
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-http
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-semantic-conventions
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
+dill==0.3.8
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   multiprocess
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
+diskcache==5.6.3
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
+einops==0.8.0
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
+    # via text-generation-server (pyproject.toml)
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.16.1
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via
    #   datasets
    #   huggingface-hub
    #   torch
    #   transformers
 frozenlist==1.5.0
    # via
    #   aiohttp
    #   aiosignal
 fsspec==2024.6.1
    # via
    #   datasets
    #   huggingface-hub
    #   torch
 genson==1.3.0
    # via outlines
 googleapis-common-protos==1.65.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
 grpcio==1.68.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
 grpcio-reflection==1.68.0
    # via text-generation-server (pyproject.toml)
 grpcio-status==1.68.0
    # via text-generation-server (pyproject.toml)
 hf-transfer==0.1.8
    # via text-generation-server (pyproject.toml)
 huggingface-hub==0.28.1
    # via
    #   accelerate
    #   datasets
    #   peft
    #   tokenizers
    #   transformers
 idna==3.10
    # via
    #   requests
    #   yarl
 importlib-metadata==7.1.0
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
 jinja2==3.1.5
    # via
    #   outlines
    #   torch
 jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
 jsonschema-specifications==2024.10.1
    # via jsonschema
 lark==1.2.2
    # via outlines
 loguru==0.7.3
    # via text-generation-server (pyproject.toml)
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 marlin-kernels @ https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl
    # via text-generation-server (pyproject.toml)
 mdurl==0.1.2
    # via markdown-it-py
 moe-kernels @ https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
    # via text-generation-server (pyproject.toml)
 mpmath==1.3.0
    # via sympy
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
 multiprocess==0.70.16
    # via datasets
 nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
 numpy==1.26.4
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   bitsandbytes
    #   datasets
    #   outlines
    #   pandas
    #   peft
    #   scipy
    #   transformers
 nvidia-cublas-cu12==12.4.5.8
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cuda-cupti-cu12==12.4.127
    # via torch
 nvidia-cuda-nvrtc-cu12==12.4.127
    # via torch
 nvidia-cuda-runtime-cu12==12.4.127
    # via torch
 nvidia-cudnn-cu12==9.1.0.70
    # via torch
 nvidia-cufft-cu12==11.2.1.3
    # via torch
 nvidia-curand-cu12==10.3.5.147
    # via torch
 nvidia-cusolver-cu12==11.6.1.9
    # via torch
 nvidia-cusparse-cu12==12.3.1.170
    # via
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cusparselt-cu12==0.6.2
    # via torch
 nvidia-ml-py==12.570.86
    # via moe-kernels
 nvidia-nccl-cu12==2.21.5
    # via torch
 nvidia-nvjitlink-cu12==12.4.127
    # via
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
 nvidia-nvtx-cu12==12.4.127
    # via torch
 opentelemetry-api==1.30.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
 opentelemetry-exporter-otlp==1.30.0
    # via text-generation-server (pyproject.toml)
 opentelemetry-exporter-otlp-proto-common==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-exporter-otlp-proto-grpc==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-exporter-otlp-proto-http==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-instrumentation==0.51b0
    # via opentelemetry-instrumentation-grpc
 opentelemetry-instrumentation-grpc==0.51b0
    # via text-generation-server (pyproject.toml)
 opentelemetry-proto==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-semantic-conventions==0.51b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
 outlines==0.1.14
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
 packaging==24.1
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
 peft==0.14.0
    # via text-generation-server (pyproject.toml)
 pillow==11.1.0
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
 propcache==0.2.1
    # via
    #   aiohttp
    #   yarl
 protobuf==5.29.3
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
 psutil==6.1.1
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
 pyarrow==19.0.0
    # via datasets
 pycountry==24.6.1
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.18.0
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
 pytz==2025.1
    # via pandas
 pyyaml==6.0.2
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   peft
    #   transformers
 referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
 regex==2024.9.11
    # via transformers
 requests==2.32.3
    # via
    #   datasets
    #   huggingface-hub
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
 rich==13.9.4
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 rpds-py==0.22.3
    # via
    #   jsonschema
    #   referencing
 safetensors==0.4.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
 scipy==1.13.1
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==75.2.0
    # via torch
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
 sympy==1.13.1
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
 tokenizers==0.20.3
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
 torch==2.6.0
    # via
    #   accelerate
    #   attention-kernels
    #   bitsandbytes
    #   compressed-tensors
    #   marlin-kernels
    #   moe-kernels
    #   outlines
    #   peft
 tqdm==4.66.5
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
 transformers==4.46.3
    # via
    #   compressed-tensors
    #   peft
 triton==3.2.0
    # via
    #   moe-kernels
    #   torch
 typer==0.15.1
    # via text-generation-server (pyproject.toml)
 typing-extensions==4.12.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
    #   outlines
    #   pydantic
    #   pydantic-core
    #   referencing
    #   torch
    #   typer
 tzdata==2025.1
    # via pandas
 urllib3==2.2.3
    # via requests
 wrapt==1.16.0
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
 yarl==1.18.3
    # via aiohttp
 zipp==3.20.2
    # via importlib-metadata
--- a/server/requirements_intel.txt
+++ b/server/requirements_intel.txt
@ -1,55 +1,368 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
+#    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
+accelerate==1.3.0
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+    # via
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
+    #   text-generation-server (pyproject.toml)
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   peft
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
+aiohappyeyeballs==2.4.4
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
+aiohttp==3.11.11
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   fsspec
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
+aiosignal==1.3.2
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
+airportsdata==20241001
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+annotated-types==0.7.0
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via pydantic
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
+attrs==25.1.0
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
+    #   aiohttp
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   jsonschema
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   referencing
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.8.30
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+charset-normalizer==3.4.0
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+click==8.1.7
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via typer
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+cloudpickle==3.1.1
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
+compressed-tensors==0.9.1
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
+datasets==2.21.0
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
+deprecated==1.2.14
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-api
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-grpc
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-http
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-semantic-conventions
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
+dill==0.3.8
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   multiprocess
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
+diskcache==5.6.3
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
+einops==0.8.0
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.16.1
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
+    #   huggingface-hub
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   torch
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+    #   transformers
 frozenlist==1.5.0
    # via
    #   aiohttp
    #   aiosignal
 fsspec==2024.6.1
    # via
    #   datasets
    #   huggingface-hub
    #   torch
 genson==1.3.0
    # via outlines
 googleapis-common-protos==1.65.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
 grpcio==1.68.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
 grpcio-reflection==1.68.0
    # via text-generation-server (pyproject.toml)
 grpcio-status==1.68.0
    # via text-generation-server (pyproject.toml)
 hf-transfer==0.1.8
    # via text-generation-server (pyproject.toml)
 huggingface-hub==0.28.1
    # via
    #   accelerate
    #   datasets
    #   peft
    #   tokenizers
    #   transformers
 idna==3.10
    # via
    #   requests
    #   yarl
 importlib-metadata==7.1.0
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
 jinja2==3.1.5
    # via
    #   outlines
    #   torch
 jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
 jsonschema-specifications==2024.10.1
    # via jsonschema
 lark==1.2.2
    # via outlines
 loguru==0.7.3
    # via text-generation-server (pyproject.toml)
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 mdurl==0.1.2
    # via markdown-it-py
 mpmath==1.3.0
    # via sympy
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
 multiprocess==0.70.16
    # via datasets
 nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
 numpy==1.26.4
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   outlines
    #   pandas
    #   peft
    #   scipy
    #   transformers
 nvidia-cublas-cu12==12.4.5.8
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cuda-cupti-cu12==12.4.127
    # via torch
 nvidia-cuda-nvrtc-cu12==12.4.127
    # via torch
 nvidia-cuda-runtime-cu12==12.4.127
    # via torch
 nvidia-cudnn-cu12==9.1.0.70
    # via torch
 nvidia-cufft-cu12==11.2.1.3
    # via torch
 nvidia-curand-cu12==10.3.5.147
    # via torch
 nvidia-cusolver-cu12==11.6.1.9
    # via torch
 nvidia-cusparse-cu12==12.3.1.170
    # via
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cusparselt-cu12==0.6.2
    # via torch
 nvidia-nccl-cu12==2.21.5
    # via torch
 nvidia-nvjitlink-cu12==12.4.127
    # via
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
 nvidia-nvtx-cu12==12.4.127
    # via torch
 opentelemetry-api==1.30.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
 opentelemetry-exporter-otlp==1.30.0
    # via text-generation-server (pyproject.toml)
 opentelemetry-exporter-otlp-proto-common==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-exporter-otlp-proto-grpc==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-exporter-otlp-proto-http==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-instrumentation==0.51b0
    # via opentelemetry-instrumentation-grpc
 opentelemetry-instrumentation-grpc==0.51b0
    # via text-generation-server (pyproject.toml)
 opentelemetry-proto==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-semantic-conventions==0.51b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
 outlines==0.1.14
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
 packaging==24.1
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
 peft==0.14.0
    # via text-generation-server (pyproject.toml)
 pillow==11.1.0
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
 propcache==0.2.1
    # via
    #   aiohttp
    #   yarl
 protobuf==5.29.3
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
 psutil==6.1.1
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
 pyarrow==19.0.0
    # via datasets
 pycountry==24.6.1
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.18.0
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
 pytz==2025.1
    # via pandas
 pyyaml==6.0.2
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   peft
    #   transformers
 referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
 regex==2024.9.11
    # via transformers
 requests==2.32.3
    # via
    #   datasets
    #   huggingface-hub
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
 rich==13.9.4
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 rpds-py==0.22.3
    # via
    #   jsonschema
    #   referencing
 safetensors==0.4.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
 scipy==1.13.1
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==75.2.0
    # via torch
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
 sympy==1.13.1
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
 tokenizers==0.20.3
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
 torch==2.6.0
    # via
    #   accelerate
    #   compressed-tensors
    #   outlines
    #   peft
 tqdm==4.66.5
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
 transformers==4.46.3
    # via
    #   compressed-tensors
    #   peft
 triton==3.2.0
    # via torch
 typer==0.15.1
    # via text-generation-server (pyproject.toml)
 typing-extensions==4.12.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
    #   outlines
    #   pydantic
    #   pydantic-core
    #   referencing
    #   torch
    #   typer
 tzdata==2025.1
    # via pandas
 urllib3==2.2.3
    # via requests
 wrapt==1.16.0
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
 yarl==1.18.3
    # via aiohttp
 zipp==3.20.2
    # via importlib-metadata
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@ -1,55 +1,368 @@
-certifi==2024.8.30 ; python_version >= "3.9" and python_version < "3.13"
+# This file was autogenerated by uv via the following command:
-charset-normalizer==3.4.0 ; python_version >= "3.9" and python_version < "3.13"
+#    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
+accelerate==1.3.0
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+    # via
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
+    #   text-generation-server (pyproject.toml)
-einops==0.8.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   peft
-filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
+aiohappyeyeballs==2.4.4
-fsspec==2024.6.1 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-googleapis-common-protos==1.65.0 ; python_version >= "3.9" and python_version < "3.13"
+aiohttp==3.11.11
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-grpcio-reflection==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-grpcio-status==1.62.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   fsspec
-grpcio==1.68.0 ; python_version >= "3.9" and python_version < "3.13"
+aiosignal==1.3.2
-hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
+    # via aiohttp
-huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
+airportsdata==20241001
-idna==3.10 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+annotated-types==0.7.0
-loguru==0.7.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via pydantic
-markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
+attrs==25.1.0
-mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
+    #   aiohttp
-opentelemetry-api==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   jsonschema
-opentelemetry-exporter-otlp-proto-common==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   referencing
-opentelemetry-exporter-otlp-proto-grpc==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.8.30
-opentelemetry-exporter-otlp-proto-http==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-opentelemetry-exporter-otlp==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+charset-normalizer==3.4.0
-opentelemetry-instrumentation-grpc==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via requests
-opentelemetry-instrumentation==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+click==8.1.7
-opentelemetry-proto==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via typer
-opentelemetry-sdk==1.27.0 ; python_version >= "3.9" and python_version < "3.13"
+cloudpickle==3.1.1
-opentelemetry-semantic-conventions==0.48b0 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
+compressed-tensors==0.9.1
-pillow==11.0.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
+datasets==2.21.0
-protobuf==4.25.5 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
+deprecated==1.2.14
-pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-api
-regex==2024.9.11 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-grpc
-requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-exporter-otlp-proto-http
-rich==13.9.4 ; python_version >= "3.9" and python_version < "3.13"
+    #   opentelemetry-semantic-conventions
-safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
+dill==0.3.8
-scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   multiprocess
-shellingham==1.5.4 ; python_version >= "3.9" and python_version < "3.13"
+diskcache==5.6.3
-tokenizers==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
+    # via outlines
-tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
+einops==0.8.0
-transformers==4.46.3 ; python_version >= "3.9" and python_version < "3.13"
+    # via text-generation-server (pyproject.toml)
-typer==0.12.5 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.16.1
-typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13"
+    # via
-urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
+    #   datasets
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
+    #   huggingface-hub
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
+    #   torch
-zipp==3.20.2 ; python_version >= "3.9" and python_version < "3.13"
+    #   transformers
 frozenlist==1.5.0
    # via
    #   aiohttp
    #   aiosignal
 fsspec==2024.6.1
    # via
    #   datasets
    #   huggingface-hub
    #   torch
 genson==1.3.0
    # via outlines
 googleapis-common-protos==1.65.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
 grpcio==1.68.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
 grpcio-reflection==1.68.0
    # via text-generation-server (pyproject.toml)
 grpcio-status==1.68.0
    # via text-generation-server (pyproject.toml)
 hf-transfer==0.1.8
    # via text-generation-server (pyproject.toml)
 huggingface-hub==0.28.1
    # via
    #   accelerate
    #   datasets
    #   peft
    #   tokenizers
    #   transformers
 idna==3.10
    # via
    #   requests
    #   yarl
 importlib-metadata==7.1.0
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
 jinja2==3.1.5
    # via
    #   outlines
    #   torch
 jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
 jsonschema-specifications==2024.10.1
    # via jsonschema
 lark==1.2.2
    # via outlines
 loguru==0.7.3
    # via text-generation-server (pyproject.toml)
 markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 mdurl==0.1.2
    # via markdown-it-py
 mpmath==1.3.0
    # via sympy
 multidict==6.1.0
    # via
    #   aiohttp
    #   yarl
 multiprocess==0.70.16
    # via datasets
 nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
 numpy==1.26.4
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   outlines
    #   pandas
    #   peft
    #   scipy
    #   transformers
 nvidia-cublas-cu12==12.4.5.8
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cuda-cupti-cu12==12.4.127
    # via torch
 nvidia-cuda-nvrtc-cu12==12.4.127
    # via torch
 nvidia-cuda-runtime-cu12==12.4.127
    # via torch
 nvidia-cudnn-cu12==9.1.0.70
    # via torch
 nvidia-cufft-cu12==11.2.1.3
    # via torch
 nvidia-curand-cu12==10.3.5.147
    # via torch
 nvidia-cusolver-cu12==11.6.1.9
    # via torch
 nvidia-cusparse-cu12==12.3.1.170
    # via
    #   nvidia-cusolver-cu12
    #   torch
 nvidia-cusparselt-cu12==0.6.2
    # via torch
 nvidia-nccl-cu12==2.21.5
    # via torch
 nvidia-nvjitlink-cu12==12.4.127
    # via
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
 nvidia-nvtx-cu12==12.4.127
    # via torch
 opentelemetry-api==1.30.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
 opentelemetry-exporter-otlp==1.30.0
    # via text-generation-server (pyproject.toml)
 opentelemetry-exporter-otlp-proto-common==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-exporter-otlp-proto-grpc==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-exporter-otlp-proto-http==1.30.0
    # via opentelemetry-exporter-otlp
 opentelemetry-instrumentation==0.51b0
    # via opentelemetry-instrumentation-grpc
 opentelemetry-instrumentation-grpc==0.51b0
    # via text-generation-server (pyproject.toml)
 opentelemetry-proto==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.30.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-semantic-conventions==0.51b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
 outlines==0.1.14
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
 packaging==24.1
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
 peft==0.14.0
    # via text-generation-server (pyproject.toml)
 pillow==11.1.0
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
 propcache==0.2.1
    # via
    #   aiohttp
    #   yarl
 protobuf==5.29.3
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
 psutil==6.1.1
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
 pyarrow==19.0.0
    # via datasets
 pycountry==24.6.1
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
 pydantic-core==2.27.2
    # via pydantic
 pygments==2.18.0
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
 pytz==2025.1
    # via pandas
 pyyaml==6.0.2
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   peft
    #   transformers
 referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
 regex==2024.9.11
    # via transformers
 requests==2.32.3
    # via
    #   datasets
    #   huggingface-hub
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
 rich==13.9.4
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 rpds-py==0.22.3
    # via
    #   jsonschema
    #   referencing
 safetensors==0.4.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
 scipy==1.13.1
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==75.2.0
    # via torch
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
 sympy==1.13.1
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
 tokenizers==0.20.3
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
 torch==2.6.0
    # via
    #   accelerate
    #   compressed-tensors
    #   outlines
    #   peft
 tqdm==4.66.5
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
 transformers==4.46.3
    # via
    #   compressed-tensors
    #   peft
 triton==3.2.0
    # via torch
 typer==0.15.1
    # via text-generation-server (pyproject.toml)
 typing-extensions==4.12.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
    #   outlines
    #   pydantic
    #   pydantic-core
    #   referencing
    #   torch
    #   typer
 tzdata==2025.1
    # via pandas
 urllib3==2.2.3
    # via requests
 wrapt==1.16.0
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
 yarl==1.18.3
    # via aiohttp
 zipp==3.20.2
    # via importlib-metadata