text-generation-inference/server/pyproject.toml
Daniël de Kok 571ac9b507
Use kernels from the kernel hub (#2988)
* Use Hub kernels for Marlin and cutlass quantization kernels

* Use hub kernels for MoE/GPTQ-Marlin MoE

* Use attention kernels from the Hub

* Cache the kernels in the Docker image

* Update moe kernels

* Support loading local kernels for development

* Support latest moe kernels

* Update to moe 0.1.1

* CI: download locked kernels for server tests

* Fixup some imports

* CI: activate venv

* Fix unused imports

* Nix: add attention/moe/quantization kernels

* Update hf-kernels to 0.1.5

* Update kernels

* Update tgi-nix flake for hf-kernels

* Fix EOF

* Take `load_kernel` out of a frequently-called function

* Hoist another case of kernel loading out of a somewhat hot function

* marlin-kernels -> quantization

* attention -> paged-attention

* EOF fix

* Update hf-kernels, fixup Docker

* ipex fix

* Remove outdated TODO
2025-02-10 19:19:25 +01:00

89 lines
2.0 KiB
TOML

[project]
name = "text-generation-server"
version = "2.0.5-dev0"
description = "Text Generation Inference Python gRPC Server"
readme = "README.md"
requires-python = ">=3.9"
authors = [
{name = "Olivier Dehaene", email = "olivier@huggingface.co"},
{name = "Nicolas Patry", email = "nicolas@huggingface.co"},
]
dependencies = [
"einops>=0.8.0",
"grpc-interceptor>=0.15.4",
"grpcio>=1.67.0",
"grpcio-reflection>=1.67.0",
"grpcio-status>=1.67.0",
"hf-kernels>=0.1.5",
"hf-transfer>=0.1.8",
"loguru>=0.7.3",
"numpy>=1.26,<3",
"opentelemetry-api>=1.27.0",
"opentelemetry-exporter-otlp>=1.27.0",
"opentelemetry-instrumentation-grpc>=0.50b0",
"pillow>=11.1.0",
"prometheus-client>=0.21.0",
"protobuf>=5.28.3",
"py-cpuinfo>=9.0.0",
"rich>=13.8.1",
"safetensors>=0.4.5",
"scipy>=1.13.1",
"sentencepiece>=0.2.0",
"tokenizers>=0.20.3",
"typer>=0.15.1",
"transformers>=4.48.0"
]
[build-system]
requires = ["hf-kernels>=0.1.2", "setuptools"]
build-backend = "setuptools.build_meta"
[tool.kernels.dependencies]
"kernels-community/paged-attention" = ">=0.0.2"
"kernels-community/moe" = ">=0.1.1"
"kernels-community/quantization" = ">=0.0.3"
[project.scripts]
text-generation-server = "text_generation_server.cli:app"
[project.optional-dependencies]
accelerate = [
"accelerate>=1.2.1,<2",
]
bnb = [
"bitsandbytes>=0.45.0",
]
compressed-tensors = [
"compressed-tensors>=0.9.0",
]
peft = [
"peft>=0.14.0",
]
outlines = [
"outlines>=0.1.13",
]
dev = [
"grpcio-tools>=1.51.1,<2.0",
"pytest>=7.3.0,<8"
]
quantize = [
"texttable>=1.6.7,<2",
"datasets>=2.21,<3",
]
gen = [
"grpcio-tools>=1.69.0",
"mypy-protobuf>=3.6.0",
]
[tool.pytest.ini_options]
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
[tool.isort]
profile = "black"
[tool.uv]
package = true
[tool.setuptools.packages.find]
include = ["text_generation_server*"]