diff --git a/tgi/Makefile b/tgi/Makefile index a06df033..b1c844df 100644 --- a/tgi/Makefile +++ b/tgi/Makefile @@ -2,5 +2,14 @@ build: maturin build -install: build +# by commenting out the line in .gitignore, we can include the generated files in the git repository +comment-gitignore: + sed -i 's/^\(.*\)/# \1/' ../server/text_generation_server/pb/.gitignore + +remove-comment-gitignore: + sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore + +library-install: pip install -e . + +install: build comment-gitignore library-install remove-comment-gitignore diff --git a/tgi/pyproject.toml b/tgi/pyproject.toml index cb103d0b..1c824608 100644 --- a/tgi/pyproject.toml +++ b/tgi/pyproject.toml @@ -13,3 +13,21 @@ classifiers = [ dynamic = ["version"] [tool.maturin] features = ["pyo3/extension-module"] + +# TODO: Compile protos before installing text_generation_server + +# pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir +# mkdir text_generation_server/pb || true +# python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \ +# --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto +# find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; +# touch text_generation_server/pb/__init__.py + +# NOTE: currently we comment out server/text_generation_server/pb/.gitignore +# which includes the generated python files. These then get included in the +# package. This is not ideal, but works for now. +python-packages = ["tgi", "text_generation_server"] + + +[project.scripts] +text-generation-server = "tgi:text_generation_server_cli_main" diff --git a/tgi/requirements.txt b/tgi/requirements.txt new file mode 100644 index 00000000..c3cf5d74 --- /dev/null +++ b/tgi/requirements.txt @@ -0,0 +1,90 @@ +accelerate==0.30.1 +annotated-types==0.6.0 +attrs==23.2.0 +beautifulsoup4==4.12.3 +certifi==2024.2.2 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpickle==3.0.0 +Deprecated==1.2.14 +diskcache==5.6.3 +einops==0.8.0 +filelock==3.14.0 +fsspec==2024.5.0 +google==3.0.0 +googleapis-common-protos==1.56.1 +grpc-interceptor==0.15.4 +grpcio==1.63.0 +grpcio-reflection==1.63.0 +grpcio-status==1.63.0 +grpcio-tools==1.63.0 +huggingface-hub==0.23.0 +idna==3.7 +importlib-metadata==7.0.0 +interegular==0.3.3 +Jinja2==3.1.4 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +lark==1.1.9 +llvmlite==0.42.0 +loguru==0.7.2 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +mdurl==0.1.2 +mpmath==1.3.0 +nest-asyncio==1.6.0 +networkx==3.3 +numba==0.59.1 +numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.1.105 +opentelemetry-api==1.24.0 +opentelemetry-exporter-otlp==1.24.0 +opentelemetry-exporter-otlp-proto-common==1.24.0 +opentelemetry-exporter-otlp-proto-grpc==1.24.0 +opentelemetry-exporter-otlp-proto-http==1.24.0 +opentelemetry-instrumentation==0.45b0 +opentelemetry-instrumentation-grpc==0.45b0 +opentelemetry-proto==1.24.0 +opentelemetry-sdk==1.24.0 +opentelemetry-semantic-conventions==0.45b0 +outlines==0.0.41 +packaging==24.0 +peft==0.11.1 +pillow==10.3.0 +protobuf==4.25.3 +psutil==5.9.8 +pydantic==2.7.1 +pydantic_core==2.18.2 +Pygments==2.18.0 +PyYAML==6.0.1 +referencing==0.35.1 +regex==2024.5.15 +requests==2.31.0 +rich==13.7.1 +rpds-py==0.18.1 +safetensors==0.4.3 +shellingham==1.5.4 +soupsieve==2.5 +sympy==1.12 +tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c +tokenizers==0.19.1 +torch==2.3.0 +tqdm==4.66.4 +transformers==4.41.0 +triton==2.3.0 +typer==0.12.3 +typing_extensions==4.11.0 +urllib3==2.2.1 +wrapt==1.16.0 +zipp==3.18.2 diff --git a/tgi/text_generation_server b/tgi/text_generation_server new file mode 120000 index 00000000..ce7555e8 --- /dev/null +++ b/tgi/text_generation_server @@ -0,0 +1 @@ +../server/text_generation_server \ No newline at end of file diff --git a/tgi/tgi/__init__.py b/tgi/tgi/__init__.py index 2826915e..2e9684a5 100644 --- a/tgi/tgi/__init__.py +++ b/tgi/tgi/__init__.py @@ -4,6 +4,7 @@ from tgi import rust_launcher, rust_sleep, fully_packaged import asyncio from dataclasses import dataclass, asdict import sys +from text_generation_server.cli import app # add the rust_launcher coroutine to the __all__ list __doc__ = tgi.__doc__ @@ -11,6 +12,11 @@ if hasattr(tgi, "__all__"): __all__ = tgi.__all__ +# wrap the app from text_generation_server.cli in a function +def text_generation_server_cli_main(): + app() + + @dataclass class Args: model_id = "google/gemma-2b-it"