text-generation-inference/server/pyproject.toml
Nicolas Patry cb747b33da
Add deepseekv3 (#2968)
* Add fp8 support moe models

add deepseekv3

format codfe'

update dockerfile

update doc

* Small modifications.

* Moe kernels 0.8.1

* Upgrade to 0.8.1

* Fixing moe import.

* Black.

* Apply suggestions from code review

Co-authored-by: Mohit Sharma <mohit21sharma.ms@gmail.com>

* Fixing Mixtral + Nits.

* Put link to ref.

* Fix other call locations.

* Scoring func `softmax` is the only one that works.

---------

Co-authored-by: Mohit Sharma <mohit21sharma.ms@gmail.com>
2025-01-30 16:40:25 +01:00

88 lines
2.8 KiB
TOML

[project]
name = "text-generation-server"
version = "2.0.5-dev0"
description = "Text Generation Inference Python gRPC Server"
readme = "README.md"
requires-python = ">=3.9"
authors = [
{name = "Olivier Dehaene", email = "olivier@huggingface.co"},
{name = "Nicolas Patry", email = "nicolas@huggingface.co"},
]
dependencies = [
"einops>=0.8.0",
"grpc-interceptor>=0.15.4",
"grpcio>=1.67.0",
"grpcio-reflection>=1.67.0",
"grpcio-status>=1.67.0",
"hf-transfer>=0.1.8",
"loguru>=0.7.3",
"numpy>=1.26,<3",
"opentelemetry-api>=1.27.0",
"opentelemetry-exporter-otlp>=1.27.0",
"opentelemetry-instrumentation-grpc>=0.50b0",
"pillow>=11.1.0",
"prometheus-client>=0.21.0",
"protobuf>=5.28.3",
"py-cpuinfo>=9.0.0",
"rich>=13.8.1",
"safetensors>=0.4.5",
"scipy>=1.13.1",
"sentencepiece>=0.2.0",
"tokenizers>=0.20.3",
"typer>=0.15.1",
]
[project.scripts]
text-generation-server = "text_generation_server.cli:app"
[project.optional-dependencies]
accelerate = [
"accelerate>=1.2.1,<2",
]
bnb = [
"bitsandbytes>=0.45.0",
]
compressed-tensors = [
"compressed-tensors>=0.9.0",
]
peft = [
"peft>=0.14.0",
]
outlines = [
"outlines>=0.1.13",
]
dev = [
"grpcio-tools>=1.51.1,<2.0",
"pytest>=7.3.0,<8"
]
quantize = [
"texttable>=1.6.7,<2",
"datasets>=2.21,<3",
]
moe = [ "moe-kernels" ]
attention = [ "attention-kernels" ]
marlin = [ "marlin-kernels" ]
gen = [
"grpcio-tools>=1.69.0",
"mypy-protobuf>=3.6.0",
]
[tool.uv.sources]
attention-kernels.url = "https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
marlin-kernels = [
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp39-cp39-linux_x86_64.whl", marker = "python_version == '3.9'" },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp310-cp310-linux_x86_64.whl", marker = "python_version == '3.10'" },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp311-cp311-linux_x86_64.whl", marker = "python_version == '3.11'" },
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp312-cp312-linux_x86_64.whl", marker = "python_version == '3.12'" },
]
moe-kernels.url = "https://github.com/danieldk/moe-kernels/releases/download/v0.8.1/moe_kernels-0.8.1+cu123torch2.5-cp39-abi3-linux_x86_64.whl"
[tool.pytest.ini_options]
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
[tool.isort]
profile = "black"
[tool.setuptools.packages.find]
include = ["text_generation_server*"]