Revert "Modifying this should cache hit."

This reverts commit 46a2bde108.
2025-09-11 12:24:53 +00:00 · 2025-01-17 18:49:49 +01:00 · 2025-01-17 18:49:49 +01:00 · c44511220d
commit c44511220d
parent 46a2bde108
6 changed files with 1 additions and 107 deletions
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -3,6 +3,7 @@ pub mod config;
 pub mod infer;
 pub mod server;
 pub mod validation;
 // Dummy change to trigger CI, TODO remove this.
 #[cfg(feature = "kserve")]
 mod kserve;
--- a/server/.python-version
+++ b/server/.python-version
@ -1 +0,0 @@
 3.12
--- a/server/gen.txt
+++ b/server/gen.txt
--- a/server/hello.py
+++ b/server/hello.py
@ -1,6 +0,0 @@
 def main():
    print("Hello from server!")
 if __name__ == "__main__":
    main()
--- a/server/pyproject.toml.back
+++ b/server/pyproject.toml.back
@ -1,98 +0,0 @@
 [tool.poetry]
 name = "text-generation-server"
 version = "2.0.5-dev0"
 description = "Text Generation Inference Python gRPC Server"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
 [tool.poetry.scripts]
 text-generation-server = 'text_generation_server.cli:app'
 [dependencies]
 python = ">=3.9,<3.13"
 protobuf = ">=4.25.3,<6"
 grpcio = "^1.51.1"
 grpcio-status = "^1.51.1"
 grpcio-reflection = "^1.51.1"
 grpc-interceptor = "^0.15.4"
 typer = "^0.12.5"
 accelerate = {version = "^1.1.0", optional = true}
 bitsandbytes = { version = "^0.43.0", optional = true }
 safetensors = "^0.4.5"
 loguru = "^0.7.2"
 opentelemetry-api = "^1.27.0"
 opentelemetry-exporter-otlp = "^1.27.0"
 opentelemetry-instrumentation-grpc = "^0.48b0"
 hf-transfer = "^0.1.2"
 sentencepiece = "^0.2.0"
 tokenizers = "^0.20.3"
 huggingface-hub = "^0.23"
 transformers = "^4.46.2"
 einops = "^0.8.0"
 texttable = { version = "^1.6.7", optional = true }
 datasets = {version = "^2.21.0", optional = true}
 peft = {version = "^0.13.2", optional = true}
 torch = {version = "^2.4.1", optional = true}
 scipy = "^1.13.1"
 pillow = "^11.0.0"
 outlines= {version = "^0.1.3", optional = true}
 prometheus-client = ">=0.20.0,<0.22"
 py-cpuinfo = "^9.0.0"
 compressed-tensors = {version = "^0.7.1", optional = true}
 # Remove later, temporary workaround for outlines.
 numpy = "^1.26.4"
 attention-kernels = [
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
 ]
 marlin-kernels = [
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
 ]
 moe-kernels = [
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
 ]
 rich = "^13.8.1"
 [project.optional-dependencies]
 torch = ["torch"]
 accelerate = ["accelerate"]
 attention = ["attention-kernels"]
 bnb = ["bitsandbytes"]
 compressed-tensors = ["compressed-tensors"]
 marlin = ["marlin-kernels"]
 moe = ["moe-kernels"]
 peft = ["peft"]
 quantize = ["texttable", "datasets", "accelerate"]
 outlines = ["outlines"]
 [dependency-groups]
 dev = [
    "grpcio-tools>=1.51.1,<2.0",
    "pytest>=7.3.0,<8"
 ]
 [[tool.poetry.source]]
 name = "pytorch-gpu-src"
 url = "https://download.pytorch.org/whl/cu121"
 priority = "explicit"
 [tool.pytest.ini_options]
 markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
 [build-system]
 requires = [
    "poetry-core>=1.0.0",
 ]
 build-backend = "poetry.core.masonry.api"
 [tool.isort]
 profile = "black"
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -9,8 +9,6 @@ from enum import Enum
 from huggingface_hub import hf_hub_download
 from text_generation_server.utils.adapter import parse_lora_adapters
 # Dummy change should cache hit.
 app = typer.Typer()