fix: avoid library name collision and add core deps to build

2025-10-14 17:35:24 +00:00 · 2024-05-20 22:29:13 +00:00 · 2024-05-20 22:29:13 +00:00 · 38688ba45d
commit 38688ba45d
parent 70b27c4b2a
9 changed files with 4657 additions and 132 deletions
--- a/router/client/build.rs
+++ b/router/client/build.rs
@ -2,18 +2,20 @@ use std::fs;

 fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("cargo:rerun-if-changed=../../proto/generate.proto");
-    fs::create_dir("src/pb").unwrap_or(());
+    // TODO: avoid this when building python library?
+    if false {
+        fs::create_dir("src/pb").unwrap_or(());

-    let mut config = prost_build::Config::new();
-    config.protoc_arg("--experimental_allow_proto3_optional");
-
-    tonic_build::configure()
-        .build_client(true)
-        .build_server(false)
-        .out_dir("src/pb")
-        .include_file("mod.rs")
-        .compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
-        .unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
+        let mut config = prost_build::Config::new();
+        config.protoc_arg("--experimental_allow_proto3_optional");

+        tonic_build::configure()
+            .build_client(true)
+            .build_server(false)
+            .out_dir("src/pb")
+            .include_file("mod.rs")
+            .compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
+            .unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
+    }
    Ok(())
 }
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -242,8 +242,6 @@ pub async fn internal_main_args() -> Result<(), RouterError> {
        .skip_while(|a| a.contains("python"))
        .collect();
    let args = Args::parse_from(args);
-
-    println!("{:?}", args);
    let out = internal_main(
        args.max_concurrent_requests,
        args.max_best_of,
@ -309,10 +307,7 @@ pub async fn internal_main(
    max_client_batch_size: usize,
 ) -> Result<(), RouterError> {
    // Launch Tokio runtime
-    if otlp_endpoint.is_some() {
-        // Initialize if OpenTelemetry is enabled
-        init_logging(otlp_endpoint, json_output);
-    }
+    init_logging(otlp_endpoint, json_output);

    // Validate args
    if max_input_tokens >= max_total_tokens {
--- a/tgi/Cargo.lock
+++ b/tgi/Cargo.lock
--- a/tgi/Cargo.toml
+++ b/tgi/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "tgi"
-version = "0.1.0"
+version = "0.0.1"
 edition = "2021"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/tgi/Makefile
+++ b/tgi/Makefile
@ -1,6 +1,6 @@

 build:
-	maturin build
+	maturin build --sdist --interpreter python3.11

 # by commenting out the line in .gitignore, we can include the generated files in the git repository
 comment-gitignore:
@ -9,9 +9,16 @@ comment-gitignore:
 remove-comment-gitignore:
 	sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore

+comment-gitignore2:
+	sed -i 's/^\(.*\)/# \1/' ../router/client/src/pb/.gitignore
+
+remove-comment-gitignore2:
+	sed -i 's/^# \(.*\)/\1/' ../router/client/src/pb/.gitignore
+
+
 library-install:
 	pip install -e .

-install: comment-gitignore build library-install remove-comment-gitignore
+install: comment-gitignore comment-gitignore2 build library-install remove-comment-gitignore remove-comment-gitignore2

 quick-install: build library-install
--- a/tgi/pyproject.toml
+++ b/tgi/pyproject.toml
@ -3,25 +3,70 @@ requires = ["maturin>=1.5,<2.0"]
 build-backend = "maturin"

 [project]
+dependencies = [
+  "backoff==2.2.1",
+  "certifi==2024.2.2",
+  "charset-normalizer==3.3.2",
+  "click==8.1.7",
+  "colorama==0.4.6",
+  "deprecated==1.2.14",
+  "einops==0.6.1",
+  "filelock==3.14.0",
+  "fsspec==2024.3.1",
+  "googleapis-common-protos==1.63.0",
+  "grpc-interceptor==0.15.4",
+  "grpcio-reflection==1.62.2",
+  "grpcio-status==1.62.2",
+  "grpcio==1.63.0",
+  "hf-transfer==0.1.6",
+  "huggingface-hub==0.23.0",
+  "idna==3.7",
+  "loguru==0.6.0",
+  "numpy==1.26.4",
+  "opentelemetry-api==1.15.0",
+  "opentelemetry-exporter-otlp-proto-grpc==1.15.0",
+  "opentelemetry-exporter-otlp-proto-http==1.15.0",
+  "opentelemetry-exporter-otlp==1.15.0",
+  "opentelemetry-instrumentation-grpc==0.36b0",
+  "opentelemetry-instrumentation==0.36b0",
+  "opentelemetry-proto==1.15.0",
+  "opentelemetry-sdk==1.15.0",
+  "opentelemetry-semantic-conventions==0.36b0",
+  "packaging==24.0",
+  "pillow==10.3.0",
+  "prometheus-client==0.20.0",
+  "protobuf==4.25.3",
+  "py-cpuinfo==9.0.0",
+  "pyyaml==6.0.1",
+  "regex==2024.5.10",
+  "requests==2.31.0",
+  "safetensors==0.4.3",
+  "scipy==1.13.0",
+  "sentencepiece==0.1.99",
+  "setuptools==69.5.1",
+  "tokenizers==0.19.1",
+  "tqdm==4.66.4",
+  "transformers @ git+https://github.com/huggingface/transformers.git@b8aee2e918d7ba2d5e9e80162ae26b4806873307",
+  "typer==0.6.1",
+  "typing-extensions==4.11.0",
+  "urllib3==2.2.1",
+  "win32-setctime==1.1.0",
+  "wrapt==1.16.0",
+  "torch==2.1.0",
+  "peft==0.10.0",
+  "outlines==0.0.34",
+]
 name = "tgi"
 requires-python = ">=3.8"
 classifiers = [
-    "Programming Language :: Rust",
-    "Programming Language :: Python :: Implementation :: CPython",
-    "Programming Language :: Python :: Implementation :: PyPy",
+  "Programming Language :: Rust",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
 ]
 dynamic = ["version"]
 [tool.maturin]
 features = ["pyo3/extension-module"]
-
-# TODO: Compile protos before installing text_generation_server
-
-# pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
-# mkdir text_generation_server/pb || true
-# python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
-#     --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
-# find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
-# touch text_generation_server/pb/__init__.py
+module-name = "tgi._tgi"

 # NOTE: currently we comment out server/text_generation_server/pb/.gitignore
 # which includes the generated python files. These then get included in the
--- a/tgi/requirements.txt
+++ b/tgi/requirements.txt
@ -1,90 +0,0 @@
-accelerate==0.30.1
-annotated-types==0.6.0
-attrs==23.2.0
-beautifulsoup4==4.12.3
-certifi==2024.2.2
-charset-normalizer==3.3.2
-click==8.1.7
-cloudpickle==3.0.0
-Deprecated==1.2.14
-diskcache==5.6.3
-einops==0.8.0
-filelock==3.14.0
-fsspec==2024.5.0
-google==3.0.0
-googleapis-common-protos==1.56.1
-grpc-interceptor==0.15.4
-grpcio==1.63.0
-grpcio-reflection==1.63.0
-grpcio-status==1.63.0
-grpcio-tools==1.63.0
-huggingface-hub==0.23.0
-idna==3.7
-importlib-metadata==7.0.0
-interegular==0.3.3
-Jinja2==3.1.4
-jsonschema==4.22.0
-jsonschema-specifications==2023.12.1
-lark==1.1.9
-llvmlite==0.42.0
-loguru==0.7.2
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-mdurl==0.1.2
-mpmath==1.3.0
-nest-asyncio==1.6.0
-networkx==3.3
-numba==0.59.1
-numpy==1.26.4
-nvidia-cublas-cu12==12.1.3.1
-nvidia-cuda-cupti-cu12==12.1.105
-nvidia-cuda-nvrtc-cu12==12.1.105
-nvidia-cuda-runtime-cu12==12.1.105
-nvidia-cudnn-cu12==8.9.2.26
-nvidia-cufft-cu12==11.0.2.54
-nvidia-curand-cu12==10.3.2.106
-nvidia-cusolver-cu12==11.4.5.107
-nvidia-cusparse-cu12==12.1.0.106
-nvidia-nccl-cu12==2.20.5
-nvidia-nvjitlink-cu12==12.4.127
-nvidia-nvtx-cu12==12.1.105
-opentelemetry-api==1.24.0
-opentelemetry-exporter-otlp==1.24.0
-opentelemetry-exporter-otlp-proto-common==1.24.0
-opentelemetry-exporter-otlp-proto-grpc==1.24.0
-opentelemetry-exporter-otlp-proto-http==1.24.0
-opentelemetry-instrumentation==0.45b0
-opentelemetry-instrumentation-grpc==0.45b0
-opentelemetry-proto==1.24.0
-opentelemetry-sdk==1.24.0
-opentelemetry-semantic-conventions==0.45b0
-outlines==0.0.41
-packaging==24.0
-peft==0.11.1
-pillow==10.3.0
-protobuf==4.25.3
-psutil==5.9.8
-pydantic==2.7.1
-pydantic_core==2.18.2
-Pygments==2.18.0
-PyYAML==6.0.1
-referencing==0.35.1
-regex==2024.5.15
-requests==2.31.0
-rich==13.7.1
-rpds-py==0.18.1
-safetensors==0.4.3
-shellingham==1.5.4
-soupsieve==2.5
-sympy==1.12
-tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c
-tokenizers==0.19.1
-torch==2.3.0
-tqdm==4.66.4
-transformers==4.41.0
-triton==2.3.0
-typer==0.12.3
-typing_extensions==4.11.0
-urllib3==2.2.1
-wrapt==1.16.0
-zipp==3.18.2
--- a/tgi/src/lib.rs
+++ b/tgi/src/lib.rs
@ -192,7 +192,7 @@ fn rust_launcher_cli(_py: Python<'_>) -> PyResult<String> {
 }

 #[pymodule]
-fn tgi(_py: Python, m: &PyModule) -> PyResult<()> {
+fn _tgi(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(rust_sleep, m)?)?;
    m.add_function(wrap_pyfunction!(rust_router, m)?)?;
    m.add_function(wrap_pyfunction!(rust_launcher, m)?)?;
--- a/tgi/tgi/init.py
+++ b/tgi/tgi/init.py
@ -1,27 +1,25 @@
-from .tgi import *
 import threading
-from tgi import rust_router, rust_launcher, rust_launcher_cli
+from tgi import _tgi
 import asyncio
 from dataclasses import dataclass, asdict
 from text_generation_server.cli import app

 # add the rust_launcher coroutine to the __all__ list
-__doc__ = tgi.__doc__
-if hasattr(tgi, "__all__"):
-    __all__ = tgi.__all__
+__doc__ = _tgi.__doc__
+if hasattr(_tgi, "__all__"):
+    __all__ = _tgi.__all__


-# wrap the app from text_generation_server.cli in a function
 def text_generation_server_cli_main():
    app()


 def text_generation_router_cli_main():
-    rust_router()
+    _tgi.rust_router()


 def text_generation_launcher_cli_main():
-    rust_launcher_cli()
+    _tgi.rust_launcher_cli()


@dataclass
@ -88,7 +86,7 @@ class TGI(object):
        print(args)
        args = Args(**args)
        try:
-            await rust_launcher(
+            await _tgi.rust_launcher(
                args.model_id,
                args.revision,
                args.validation_workers,