From c44511220d623452677f150318bcc2f36f583f1c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 17 Jan 2025 18:49:49 +0100 Subject: [PATCH] Revert "Modifying this should cache hit." This reverts commit 46a2bde108f0ce5f8a3d652c531cfbb6625c59c1. --- router/src/lib.rs | 1 + server/.python-version | 1 - server/gen.txt | 0 server/hello.py | 6 -- server/pyproject.toml.back | 98 ---------------------------- server/text_generation_server/cli.py | 2 - 6 files changed, 1 insertion(+), 107 deletions(-) delete mode 100644 server/.python-version delete mode 100644 server/gen.txt delete mode 100644 server/hello.py delete mode 100644 server/pyproject.toml.back diff --git a/router/src/lib.rs b/router/src/lib.rs index dbd36827..56107daa 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -3,6 +3,7 @@ pub mod config; pub mod infer; pub mod server; pub mod validation; +// Dummy change to trigger CI, TODO remove this. #[cfg(feature = "kserve")] mod kserve; diff --git a/server/.python-version b/server/.python-version deleted file mode 100644 index e4fba218..00000000 --- a/server/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.12 diff --git a/server/gen.txt b/server/gen.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/server/hello.py b/server/hello.py deleted file mode 100644 index e71827d7..00000000 --- a/server/hello.py +++ /dev/null @@ -1,6 +0,0 @@ -def main(): - print("Hello from server!") - - -if __name__ == "__main__": - main() diff --git a/server/pyproject.toml.back b/server/pyproject.toml.back deleted file mode 100644 index bae251a3..00000000 --- a/server/pyproject.toml.back +++ /dev/null @@ -1,98 +0,0 @@ -[tool.poetry] -name = "text-generation-server" -version = "2.0.5-dev0" -description = "Text Generation Inference Python gRPC Server" -authors = ["Olivier Dehaene "] - -[tool.poetry.scripts] -text-generation-server = 'text_generation_server.cli:app' - -[dependencies] -python = ">=3.9,<3.13" -protobuf = ">=4.25.3,<6" -grpcio = "^1.51.1" -grpcio-status = "^1.51.1" -grpcio-reflection = "^1.51.1" -grpc-interceptor = "^0.15.4" -typer = "^0.12.5" -accelerate = {version = "^1.1.0", optional = true} -bitsandbytes = { version = "^0.43.0", optional = true } -safetensors = "^0.4.5" -loguru = "^0.7.2" -opentelemetry-api = "^1.27.0" -opentelemetry-exporter-otlp = "^1.27.0" -opentelemetry-instrumentation-grpc = "^0.48b0" -hf-transfer = "^0.1.2" -sentencepiece = "^0.2.0" -tokenizers = "^0.20.3" -huggingface-hub = "^0.23" -transformers = "^4.46.2" -einops = "^0.8.0" -texttable = { version = "^1.6.7", optional = true } -datasets = {version = "^2.21.0", optional = true} -peft = {version = "^0.13.2", optional = true} -torch = {version = "^2.4.1", optional = true} -scipy = "^1.13.1" -pillow = "^11.0.0" -outlines= {version = "^0.1.3", optional = true} -prometheus-client = ">=0.20.0,<0.22" -py-cpuinfo = "^9.0.0" -compressed-tensors = {version = "^0.7.1", optional = true} -# Remove later, temporary workaround for outlines. -numpy = "^1.26.4" - -attention-kernels = [ - { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, -] -marlin-kernels = [ - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, -] -moe-kernels = [ - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true }, - { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true }, -] -rich = "^13.8.1" - -[project.optional-dependencies] -torch = ["torch"] -accelerate = ["accelerate"] -attention = ["attention-kernels"] -bnb = ["bitsandbytes"] -compressed-tensors = ["compressed-tensors"] -marlin = ["marlin-kernels"] -moe = ["moe-kernels"] -peft = ["peft"] -quantize = ["texttable", "datasets", "accelerate"] -outlines = ["outlines"] - -[dependency-groups] -dev = [ - "grpcio-tools>=1.51.1,<2.0", - "pytest>=7.3.0,<8" -] - - -[[tool.poetry.source]] -name = "pytorch-gpu-src" -url = "https://download.pytorch.org/whl/cu121" -priority = "explicit" - -[tool.pytest.ini_options] -markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"] - -[build-system] -requires = [ - "poetry-core>=1.0.0", -] -build-backend = "poetry.core.masonry.api" - -[tool.isort] -profile = "black" diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index cc7726cd..d8155b49 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -9,8 +9,6 @@ from enum import Enum from huggingface_hub import hf_hub_download from text_generation_server.utils.adapter import parse_lora_adapters -# Dummy change should cache hit. - app = typer.Typer()