text-generation-inference/server/pyproject.toml

[project]
name = "text-generation-server"
version = "2.0.5-dev0"
description = "Text Generation Inference Python gRPC Server"
readme = "README.md"
requires-python = ">=3.9"
authors = [
  {name = "Olivier Dehaene", email = "olivier@huggingface.co"},
  {name = "Nicolas Patry", email = "nicolas@huggingface.co"},
]
dependencies = [
    "einops>=0.8.0",
    "grpc-interceptor>=0.15.4",
    "grpcio>=1.67.0",
    "grpcio-reflection>=1.67.0",
    "grpcio-status>=1.67.0",
    "hf-transfer>=0.1.8",
    "loguru>=0.7.3",
    "numpy>=1.26,<3",
    "opentelemetry-api>=1.27.0",
    "opentelemetry-exporter-otlp>=1.27.0",
    "opentelemetry-instrumentation-grpc>=0.50b0",
    "pillow>=11.1.0",
    "prometheus-client>=0.21.0",
    "protobuf>=5.28.3",
    "py-cpuinfo>=9.0.0",
    "rich>=13.8.1",
    "safetensors>=0.4.5",
    "scipy>=1.13.1",
    "sentencepiece>=0.2.0",
    "tokenizers>=0.20.3",
    "typer>=0.15.1",
    "transformers>=4.48.0"
]

[project.scripts]
text-generation-server = "text_generation_server.cli:app"

[project.optional-dependencies]
accelerate = [
    "accelerate>=1.2.1,<2",
]
bnb = [
    "bitsandbytes>=0.45.0",
]
compressed-tensors = [
    "compressed-tensors>=0.9.0",
]
peft = [
    "peft>=0.14.0",
]
outlines = [
    "outlines>=0.1.13",
]
dev = [
    "grpcio-tools>=1.51.1,<2.0",
    "pytest>=7.3.0,<8"
]
quantize = [
    "texttable>=1.6.7,<2",
    "datasets>=2.21,<3",
]
moe = [ "moe-kernels" ]
attention = [ "attention-kernels" ]
marlin = [ "marlin-kernels" ]
gen = [
    "grpcio-tools>=1.69.0",
    "mypy-protobuf>=3.6.0",
]
intel-xpu = [
    "torch-intel-xpu",
    "torchaudio-intel-xpu",
    "torchvision-intel-xpu",
    "oneccl-intel-xpu",
    "triton-xpu==3.0.0b2"
]
intel-cpu = [
    "torch-intel-cpu",
    "torchaudio-intel-cpu",
    "torchvision-intel-cpu",
    "oneccl-intel-cpu",
    "triton==3.1.0",
    "py-libnuma"
]

[tool.uv.sources]
attention-kernels.url = "https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
marlin-kernels = [
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp39-cp39-linux_x86_64.whl", marker = "python_version == '3.9'" },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp310-cp310-linux_x86_64.whl", marker = "python_version == '3.10'" },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp311-cp311-linux_x86_64.whl", marker = "python_version == '3.11'" },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" },
]
moe-kernels.url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
torch-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl"
torchaudio-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl"
torchvision-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl"
oneccl-intel-xpu.url = "https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl"
torch-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"
torchaudio-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"
torchvision-intel-cpu.url = "https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl"

[tool.pytest.ini_options]
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]

[tool.isort]
profile = "black"

[tool.uv]
package = true

[tool.setuptools.packages.find]
include = ["text_generation_server*"]