diff --git a/router/src/lib.rs b/router/src/lib.rs index 56107daa..dbd36827 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -3,7 +3,6 @@ pub mod config; pub mod infer; pub mod server; pub mod validation; -// Dummy change to trigger CI, TODO remove this. #[cfg(feature = "kserve")] mod kserve; diff --git a/server/.python-version b/server/.python-version new file mode 100644 index 00000000..e4fba218 --- /dev/null +++ b/server/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/server/gen.txt b/server/gen.txt new file mode 100644 index 00000000..e69de29b diff --git a/server/hello.py b/server/hello.py new file mode 100644 index 00000000..e71827d7 --- /dev/null +++ b/server/hello.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from server!") + + +if __name__ == "__main__": + main() diff --git a/server/pyproject.toml.back b/server/pyproject.toml.back new file mode 100644 index 00000000..bae251a3 --- /dev/null +++ b/server/pyproject.toml.back @@ -0,0 +1,98 @@ +[tool.poetry] +name = "text-generation-server" +version = "2.0.5-dev0" +description = "Text Generation Inference Python gRPC Server" +authors = ["Olivier Dehaene "] + +[tool.poetry.scripts] +text-generation-server = 'text_generation_server.cli:app' + +[dependencies] +python = ">=3.9,<3.13" +protobuf = ">=4.25.3,<6" +grpcio = "^1.51.1" +grpcio-status = "^1.51.1" +grpcio-reflection = "^1.51.1" +grpc-interceptor = "^0.15.4" +typer = "^0.12.5" +accelerate = {version = "^1.1.0", optional = true} +bitsandbytes = { version = "^0.43.0", optional = true } +safetensors = "^0.4.5" +loguru = "^0.7.2" +opentelemetry-api = "^1.27.0" +opentelemetry-exporter-otlp = "^1.27.0" +opentelemetry-instrumentation-grpc = "^0.48b0" +hf-transfer = "^0.1.2" +sentencepiece = "^0.2.0" +tokenizers = "^0.20.3" +huggingface-hub = "^0.23" +transformers = "^4.46.2" +einops = "^0.8.0" +texttable = { version = "^1.6.7", optional = true } +datasets = {version = "^2.21.0", optional = true} +peft = {version = "^0.13.2", optional = true} +torch = {version = "^2.4.1", optional = true} +scipy = "^1.13.1" +pillow = "^11.0.0" +outlines= {version = "^0.1.3", optional = true} +prometheus-client = ">=0.20.0,<0.22" +py-cpuinfo = "^9.0.0" +compressed-tensors = {version = "^0.7.1", optional = true} +# Remove later, temporary workaround for outlines. +numpy = "^1.26.4" + +attention-kernels = [ + { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, +] +marlin-kernels = [ + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, +] +moe-kernels = [ + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, + { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, +] +rich = "^13.8.1" + +[project.optional-dependencies] +torch = ["torch"] +accelerate = ["accelerate"] +attention = ["attention-kernels"] +bnb = ["bitsandbytes"] +compressed-tensors = ["compressed-tensors"] +marlin = ["marlin-kernels"] +moe = ["moe-kernels"] +peft = ["peft"] +quantize = ["texttable", "datasets", "accelerate"] +outlines = ["outlines"] + +[dependency-groups] +dev = [ + "grpcio-tools>=1.51.1,<2.0", + "pytest>=7.3.0,<8" +] + + +[[tool.poetry.source]] +name = "pytorch-gpu-src" +url = "https://download.pytorch.org/whl/cu121" +priority = "explicit" + +[tool.pytest.ini_options] +markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"] + +[build-system] +requires = [ + "poetry-core>=1.0.0", +] +build-backend = "poetry.core.masonry.api" + +[tool.isort] +profile = "black" diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index d8155b49..cc7726cd 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -9,6 +9,8 @@ from enum import Enum from huggingface_hub import hf_hub_download from text_generation_server.utils.adapter import parse_lora_adapters +# Dummy change should cache hit. + app = typer.Typer()