text-generation-inference/server/requirements_cuda.txt

# This file was autogenerated by uv via the following command:
#    uv pip compile pyproject.toml --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_cuda.txt --python-version 3.11
accelerate==1.6.0
    # via
    #   text-generation-server (pyproject.toml)
    #   peft
aiohappyeyeballs==2.6.1
    # via aiohttp
aiohttp==3.11.18
    # via
    #   datasets
    #   fsspec
aiosignal==1.3.2
    # via aiohttp
airportsdata==20250224
    # via outlines
annotated-types==0.7.0
    # via pydantic
attrs==25.3.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
bitsandbytes==0.45.5
    # via text-generation-server (pyproject.toml)
certifi==2025.4.26
    # via requests
charset-normalizer==3.4.2
    # via requests
click==8.1.8
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
cloudpickle==3.1.1
    # via outlines
compressed-tensors==0.9.4
    # via text-generation-server (pyproject.toml)
datasets==2.21.0
    # via text-generation-server (pyproject.toml)
deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-semantic-conventions
dill==0.3.8
    # via
    #   datasets
    #   multiprocess
diskcache==5.6.3
    # via outlines
einops==0.8.1
    # via text-generation-server (pyproject.toml)
filelock==3.18.0
    # via
    #   datasets
    #   huggingface-hub
    #   torch
    #   transformers
frozenlist==1.6.0
    # via
    #   aiohttp
    #   aiosignal
fsspec==2024.6.1
    # via
    #   datasets
    #   huggingface-hub
    #   torch
genson==1.3.0
    # via outlines
googleapis-common-protos==1.70.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
grpcio==1.71.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
grpcio-reflection==1.71.0
    # via text-generation-server (pyproject.toml)
grpcio-status==1.71.0
    # via text-generation-server (pyproject.toml)
hf-transfer==0.1.9
    # via text-generation-server (pyproject.toml)
hf-xet==1.1.0
    # via
    #   text-generation-server (pyproject.toml)
    #   huggingface-hub
huggingface-hub==0.31.1
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   kernels
    #   peft
    #   tokenizers
    #   transformers
idna==3.10
    # via
    #   requests
    #   yarl
importlib-metadata==8.6.1
    # via opentelemetry-api
interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
iso3166==2.1.1
    # via outlines
jinja2==3.1.6
    # via
    #   outlines
    #   torch
jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
jsonschema-specifications==2025.4.1
    # via jsonschema
kernels==0.5.0
    # via text-generation-server (pyproject.toml)
lark==1.2.2
    # via outlines
loguru==0.7.3
    # via text-generation-server (pyproject.toml)
markdown-it-py==3.0.0
    # via rich
markupsafe==3.0.2
    # via jinja2
mdurl==0.1.2
    # via markdown-it-py
mpmath==1.3.0
    # via sympy
multidict==6.4.3
    # via
    #   aiohttp
    #   yarl
multiprocess==0.70.16
    # via datasets
nest-asyncio==1.6.0
    # via outlines
networkx==3.4.2
    # via torch
numpy==2.2.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   bitsandbytes
    #   datasets
    #   outlines
    #   pandas
    #   peft
    #   scipy
    #   transformers
nvidia-cublas-cu12==12.6.4.1
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
nvidia-cuda-cupti-cu12==12.6.80
    # via torch
nvidia-cuda-nvrtc-cu12==12.6.77
    # via torch
nvidia-cuda-runtime-cu12==12.6.77
    # via torch
nvidia-cudnn-cu12==9.5.1.17
    # via torch
nvidia-cufft-cu12==11.3.0.4
    # via torch
nvidia-cufile-cu12==1.11.1.6
    # via torch
nvidia-curand-cu12==10.3.7.77
    # via torch
nvidia-cusolver-cu12==11.7.1.2
    # via torch
nvidia-cusparse-cu12==12.5.4.2
    # via
    #   nvidia-cusolver-cu12
    #   torch
nvidia-cusparselt-cu12==0.6.3
    # via torch
nvidia-nccl-cu12==2.26.2
    # via torch
nvidia-nvjitlink-cu12==12.6.85
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
nvidia-nvtx-cu12==12.6.77
    # via torch
opentelemetry-api==1.33.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
opentelemetry-exporter-otlp==1.33.0
    # via text-generation-server (pyproject.toml)
opentelemetry-exporter-otlp-proto-common==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
opentelemetry-exporter-otlp-proto-grpc==1.33.0
    # via opentelemetry-exporter-otlp
opentelemetry-exporter-otlp-proto-http==1.33.0
    # via opentelemetry-exporter-otlp
opentelemetry-instrumentation==0.54b0
    # via opentelemetry-instrumentation-grpc
opentelemetry-instrumentation-grpc==0.54b0
    # via text-generation-server (pyproject.toml)
opentelemetry-proto==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
opentelemetry-sdk==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
opentelemetry-semantic-conventions==0.54b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
outlines==0.2.3
    # via text-generation-server (pyproject.toml)
outlines-core==0.1.26
    # via outlines
packaging==25.0
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   kernels
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
pandas==2.2.3
    # via datasets
peft==0.15.2
    # via text-generation-server (pyproject.toml)
pillow==11.2.1
    # via text-generation-server (pyproject.toml)
prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
propcache==0.3.1
    # via
    #   aiohttp
    #   yarl
protobuf==5.29.4
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
psutil==7.0.0
    # via
    #   accelerate
    #   peft
py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
pyarrow==20.0.0
    # via datasets
pydantic==2.11.4
    # via
    #   compressed-tensors
    #   outlines
pydantic-core==2.33.2
    # via pydantic
pygments==2.19.1
    # via rich
python-dateutil==2.9.0.post0
    # via pandas
pytz==2025.2
    # via pandas
pyyaml==6.0.2
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   peft
    #   transformers
referencing==0.36.2
    # via
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
regex==2024.11.6
    # via transformers
requests==2.32.3
    # via
    #   datasets
    #   huggingface-hub
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
rich==14.0.0
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
rpds-py==0.24.0
    # via
    #   jsonschema
    #   referencing
safetensors==0.5.3
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
scipy==1.15.3
    # via text-generation-server (pyproject.toml)
sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
setuptools==80.4.0
    # via triton
shellingham==1.5.4
    # via typer
six==1.17.0
    # via python-dateutil
sympy==1.14.0
    # via torch
texttable==1.7.0
    # via text-generation-server (pyproject.toml)
tokenizers==0.21.1
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
torch==2.7.0
    # via
    #   accelerate
    #   bitsandbytes
    #   compressed-tensors
    #   outlines
    #   peft
tqdm==4.67.1
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
transformers==4.51.3
    # via
    #   text-generation-server (pyproject.toml)
    #   compressed-tensors
    #   peft
triton==3.3.0
    # via torch
typer==0.15.3
    # via text-generation-server (pyproject.toml)
typing-extensions==4.13.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
    #   outlines
    #   pydantic
    #   pydantic-core
    #   referencing
    #   torch
    #   typer
    #   typing-inspection
typing-inspection==0.4.0
    # via pydantic
tzdata==2025.2
    # via pandas
urllib3==2.4.0
    # via requests
wrapt==1.17.2
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
xxhash==3.5.0
    # via datasets
yarl==1.20.0
    # via aiohttp
zipp==3.21.0
    # via importlib-metadata