fix: avoid library name collision and add core deps to build

2025-07-10 18:00:16 +00:00 · 2024-05-20 22:29:13 +00:00 · 2024-05-20 22:29:13 +00:00 · 38688ba45d
commit 38688ba45d
parent 70b27c4b2a
9 changed files with 4657 additions and 132 deletions
--- a/router/client/build.rs
+++ b/router/client/build.rs
@ -2,6 +2,8 @@ use std::fs;
 fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("cargo:rerun-if-changed=../../proto/generate.proto");
    // TODO: avoid this when building python library?
    if false {
        fs::create_dir("src/pb").unwrap_or(());
        let mut config = prost_build::Config::new();
@ -14,6 +16,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
            .include_file("mod.rs")
            .compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
            .unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
-
+    }
    Ok(())
 }
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -242,8 +242,6 @@ pub async fn internal_main_args() -> Result<(), RouterError> {
        .skip_while(|a| a.contains("python"))
        .collect();
    let args = Args::parse_from(args);
    println!("{:?}", args);
    let out = internal_main(
        args.max_concurrent_requests,
        args.max_best_of,
@ -309,10 +307,7 @@ pub async fn internal_main(
    max_client_batch_size: usize,
 ) -> Result<(), RouterError> {
    // Launch Tokio runtime
    if otlp_endpoint.is_some() {
        // Initialize if OpenTelemetry is enabled
    init_logging(otlp_endpoint, json_output);
    }
    // Validate args
    if max_input_tokens >= max_total_tokens {
--- a/tgi/Cargo.lock
+++ b/tgi/Cargo.lock
--- a/tgi/Cargo.toml
+++ b/tgi/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "tgi"
-version = "0.1.0"
+version = "0.0.1"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/tgi/Makefile
+++ b/tgi/Makefile
@ -1,6 +1,6 @@
 build:
-	maturin build
+	maturin build --sdist --interpreter python3.11
 # by commenting out the line in .gitignore, we can include the generated files in the git repository
 comment-gitignore:
@ -9,9 +9,16 @@ comment-gitignore:
 remove-comment-gitignore:
 	sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore
 comment-gitignore2:
 	sed -i 's/^\(.*\)/# \1/' ../router/client/src/pb/.gitignore
 remove-comment-gitignore2:
 	sed -i 's/^# \(.*\)/\1/' ../router/client/src/pb/.gitignore
 library-install:
 	pip install -e .
-install: comment-gitignore build library-install remove-comment-gitignore
+install: comment-gitignore comment-gitignore2 build library-install remove-comment-gitignore remove-comment-gitignore2
 quick-install: build library-install
--- a/tgi/pyproject.toml
+++ b/tgi/pyproject.toml
@ -3,6 +3,59 @@ requires = ["maturin>=1.5,<2.0"]
 build-backend = "maturin"
 [project]
 dependencies = [
  "backoff==2.2.1",
  "certifi==2024.2.2",
  "charset-normalizer==3.3.2",
  "click==8.1.7",
  "colorama==0.4.6",
  "deprecated==1.2.14",
  "einops==0.6.1",
  "filelock==3.14.0",
  "fsspec==2024.3.1",
  "googleapis-common-protos==1.63.0",
  "grpc-interceptor==0.15.4",
  "grpcio-reflection==1.62.2",
  "grpcio-status==1.62.2",
  "grpcio==1.63.0",
  "hf-transfer==0.1.6",
  "huggingface-hub==0.23.0",
  "idna==3.7",
  "loguru==0.6.0",
  "numpy==1.26.4",
  "opentelemetry-api==1.15.0",
  "opentelemetry-exporter-otlp-proto-grpc==1.15.0",
  "opentelemetry-exporter-otlp-proto-http==1.15.0",
  "opentelemetry-exporter-otlp==1.15.0",
  "opentelemetry-instrumentation-grpc==0.36b0",
  "opentelemetry-instrumentation==0.36b0",
  "opentelemetry-proto==1.15.0",
  "opentelemetry-sdk==1.15.0",
  "opentelemetry-semantic-conventions==0.36b0",
  "packaging==24.0",
  "pillow==10.3.0",
  "prometheus-client==0.20.0",
  "protobuf==4.25.3",
  "py-cpuinfo==9.0.0",
  "pyyaml==6.0.1",
  "regex==2024.5.10",
  "requests==2.31.0",
  "safetensors==0.4.3",
  "scipy==1.13.0",
  "sentencepiece==0.1.99",
  "setuptools==69.5.1",
  "tokenizers==0.19.1",
  "tqdm==4.66.4",
  "transformers @ git+https://github.com/huggingface/transformers.git@b8aee2e918d7ba2d5e9e80162ae26b4806873307",
  "typer==0.6.1",
  "typing-extensions==4.11.0",
  "urllib3==2.2.1",
  "win32-setctime==1.1.0",
  "wrapt==1.16.0",
  "torch==2.1.0",
  "peft==0.10.0",
  "outlines==0.0.34",
 ]
 name = "tgi"
 requires-python = ">=3.8"
 classifiers = [
@ -13,15 +66,7 @@ classifiers = [
 dynamic = ["version"]
 [tool.maturin]
 features = ["pyo3/extension-module"]
-
+module-name = "tgi._tgi"
 # TODO: Compile protos before installing text_generation_server
 # pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
 # mkdir text_generation_server/pb || true
 # python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
 #     --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
 # find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
 # touch text_generation_server/pb/__init__.py
 # NOTE: currently we comment out server/text_generation_server/pb/.gitignore
 # which includes the generated python files. These then get included in the
--- a/tgi/requirements.txt
+++ b/tgi/requirements.txt
@ -1,90 +0,0 @@
 accelerate==0.30.1
 annotated-types==0.6.0
 attrs==23.2.0
 beautifulsoup4==4.12.3
 certifi==2024.2.2
 charset-normalizer==3.3.2
 click==8.1.7
 cloudpickle==3.0.0
 Deprecated==1.2.14
 diskcache==5.6.3
 einops==0.8.0
 filelock==3.14.0
 fsspec==2024.5.0
 google==3.0.0
 googleapis-common-protos==1.56.1
 grpc-interceptor==0.15.4
 grpcio==1.63.0
 grpcio-reflection==1.63.0
 grpcio-status==1.63.0
 grpcio-tools==1.63.0
 huggingface-hub==0.23.0
 idna==3.7
 importlib-metadata==7.0.0
 interegular==0.3.3
 Jinja2==3.1.4
 jsonschema==4.22.0
 jsonschema-specifications==2023.12.1
 lark==1.1.9
 llvmlite==0.42.0
 loguru==0.7.2
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 mdurl==0.1.2
 mpmath==1.3.0
 nest-asyncio==1.6.0
 networkx==3.3
 numba==0.59.1
 numpy==1.26.4
 nvidia-cublas-cu12==12.1.3.1
 nvidia-cuda-cupti-cu12==12.1.105
 nvidia-cuda-nvrtc-cu12==12.1.105
 nvidia-cuda-runtime-cu12==12.1.105
 nvidia-cudnn-cu12==8.9.2.26
 nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
 nvidia-cusparse-cu12==12.1.0.106
 nvidia-nccl-cu12==2.20.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.1.105
 opentelemetry-api==1.24.0
 opentelemetry-exporter-otlp==1.24.0
 opentelemetry-exporter-otlp-proto-common==1.24.0
 opentelemetry-exporter-otlp-proto-grpc==1.24.0
 opentelemetry-exporter-otlp-proto-http==1.24.0
 opentelemetry-instrumentation==0.45b0
 opentelemetry-instrumentation-grpc==0.45b0
 opentelemetry-proto==1.24.0
 opentelemetry-sdk==1.24.0
 opentelemetry-semantic-conventions==0.45b0
 outlines==0.0.41
 packaging==24.0
 peft==0.11.1
 pillow==10.3.0
 protobuf==4.25.3
 psutil==5.9.8
 pydantic==2.7.1
 pydantic_core==2.18.2
 Pygments==2.18.0
 PyYAML==6.0.1
 referencing==0.35.1
 regex==2024.5.15
 requests==2.31.0
 rich==13.7.1
 rpds-py==0.18.1
 safetensors==0.4.3
 shellingham==1.5.4
 soupsieve==2.5
 sympy==1.12
 tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c
 tokenizers==0.19.1
 torch==2.3.0
 tqdm==4.66.4
 transformers==4.41.0
 triton==2.3.0
 typer==0.12.3
 typing_extensions==4.11.0
 urllib3==2.2.1
 wrapt==1.16.0
 zipp==3.18.2
--- a/tgi/src/lib.rs
+++ b/tgi/src/lib.rs
@ -192,7 +192,7 @@ fn rust_launcher_cli(_py: Python<'_>) -> PyResult<String> {
 }
 #[pymodule]
-fn tgi(_py: Python, m: &PyModule) -> PyResult<()> {
+fn _tgi(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(rust_sleep, m)?)?;
    m.add_function(wrap_pyfunction!(rust_router, m)?)?;
    m.add_function(wrap_pyfunction!(rust_launcher, m)?)?;
--- a/tgi/tgi/init.py
+++ b/tgi/tgi/init.py
@ -1,27 +1,25 @@
 from .tgi import *
 import threading
-from tgi import rust_router, rust_launcher, rust_launcher_cli
+from tgi import _tgi
 import asyncio
 from dataclasses import dataclass, asdict
 from text_generation_server.cli import app
 # add the rust_launcher coroutine to the __all__ list
-__doc__ = tgi.__doc__
+__doc__ = _tgi.__doc__
-if hasattr(tgi, "__all__"):
+if hasattr(_tgi, "__all__"):
-    __all__ = tgi.__all__
+    __all__ = _tgi.__all__
 # wrap the app from text_generation_server.cli in a function
 def text_generation_server_cli_main():
    app()
 def text_generation_router_cli_main():
-    rust_router()
+    _tgi.rust_router()
 def text_generation_launcher_cli_main():
-    rust_launcher_cli()
+    _tgi.rust_launcher_cli()
@dataclass
@ -88,7 +86,7 @@ class TGI(object):
        print(args)
        args = Args(**args)
        try:
-            await rust_launcher(
+            await _tgi.rust_launcher(
                args.model_id,
                args.revision,
                args.validation_workers,