Modifying this should cache hit.

2025-09-08 19:04:52 +00:00 · 2025-01-17 18:49:09 +01:00 · 2025-01-17 18:49:09 +01:00 · 46a2bde108
commit 46a2bde108
parent 176f7839a6
6 changed files with 107 additions and 1 deletions
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -3,7 +3,6 @@ pub mod config;
 pub mod infer;
 pub mod server;
 pub mod validation;
-// Dummy change to trigger CI, TODO remove this.

 #[cfg(feature = "kserve")]
 mod kserve;
--- a/server/.python-version
+++ b/server/.python-version
@ -0,0 +1 @@
+3.12
--- a/server/gen.txt
+++ b/server/gen.txt
--- a/server/hello.py
+++ b/server/hello.py
@ -0,0 +1,6 @@
+def main():
+    print("Hello from server!")
+
+
+if __name__ == "__main__":
+    main()
--- a/server/pyproject.toml.back
+++ b/server/pyproject.toml.back
@ -0,0 +1,98 @@
+[tool.poetry]
+name = "text-generation-server"
+version = "2.0.5-dev0"
+description = "Text Generation Inference Python gRPC Server"
+authors = ["Olivier Dehaene <olivier@huggingface.co>"]
+
+[tool.poetry.scripts]
+text-generation-server = 'text_generation_server.cli:app'
+
+[dependencies]
+python = ">=3.9,<3.13"
+protobuf = ">=4.25.3,<6"
+grpcio = "^1.51.1"
+grpcio-status = "^1.51.1"
+grpcio-reflection = "^1.51.1"
+grpc-interceptor = "^0.15.4"
+typer = "^0.12.5"
+accelerate = {version = "^1.1.0", optional = true}
+bitsandbytes = { version = "^0.43.0", optional = true }
+safetensors = "^0.4.5"
+loguru = "^0.7.2"
+opentelemetry-api = "^1.27.0"
+opentelemetry-exporter-otlp = "^1.27.0"
+opentelemetry-instrumentation-grpc = "^0.48b0"
+hf-transfer = "^0.1.2"
+sentencepiece = "^0.2.0"
+tokenizers = "^0.20.3"
+huggingface-hub = "^0.23"
+transformers = "^4.46.2"
+einops = "^0.8.0"
+texttable = { version = "^1.6.7", optional = true }
+datasets = {version = "^2.21.0", optional = true}
+peft = {version = "^0.13.2", optional = true}
+torch = {version = "^2.4.1", optional = true}
+scipy = "^1.13.1"
+pillow = "^11.0.0"
+outlines= {version = "^0.1.3", optional = true}
+prometheus-client = ">=0.20.0,<0.22"
+py-cpuinfo = "^9.0.0"
+compressed-tensors = {version = "^0.7.1", optional = true}
+# Remove later, temporary workaround for outlines.
+numpy = "^1.26.4"
+
+attention-kernels = [
+  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
+  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
+  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
+  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
+]
+marlin-kernels = [
+  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
+  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
+  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
+  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
+]
+moe-kernels = [
+  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
+  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
+  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
+  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
+]
+rich = "^13.8.1"
+
+[project.optional-dependencies]
+torch = ["torch"]
+accelerate = ["accelerate"]
+attention = ["attention-kernels"]
+bnb = ["bitsandbytes"]
+compressed-tensors = ["compressed-tensors"]
+marlin = ["marlin-kernels"]
+moe = ["moe-kernels"]
+peft = ["peft"]
+quantize = ["texttable", "datasets", "accelerate"]
+outlines = ["outlines"]
+
+[dependency-groups]
+dev = [
+    "grpcio-tools>=1.51.1,<2.0",
+    "pytest>=7.3.0,<8"
+]
+
+
+[[tool.poetry.source]]
+name = "pytorch-gpu-src"
+url = "https://download.pytorch.org/whl/cu121"
+priority = "explicit"
+
+[tool.pytest.ini_options]
+markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
+
+[build-system]
+requires = [
+    "poetry-core>=1.0.0",
+]
+build-backend = "poetry.core.masonry.api"
+
+[tool.isort]
+profile = "black"
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -9,6 +9,8 @@ from enum import Enum
 from huggingface_hub import hf_hub_download
 from text_generation_server.utils.adapter import parse_lora_adapters

+# Dummy change should cache hit.
+

 app = typer.Typer()