diff --git a/tgi/Makefile b/tgi/Makefile
index a06df033..b1c844df 100644
--- a/tgi/Makefile
+++ b/tgi/Makefile
@@ -2,5 +2,14 @@
 build:
 	maturin build
 
-install: build
+# by commenting out the line in .gitignore, we can include the generated files in the git repository
+comment-gitignore:
+	sed -i 's/^\(.*\)/# \1/' ../server/text_generation_server/pb/.gitignore
+
+remove-comment-gitignore:
+	sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore
+
+library-install:
 	pip install -e .
+
+install: build comment-gitignore library-install remove-comment-gitignore
diff --git a/tgi/pyproject.toml b/tgi/pyproject.toml
index cb103d0b..1c824608 100644
--- a/tgi/pyproject.toml
+++ b/tgi/pyproject.toml
@@ -13,3 +13,21 @@ classifiers = [
 dynamic = ["version"]
 [tool.maturin]
 features = ["pyo3/extension-module"]
+
+# TODO: Compile protos before installing text_generation_server
+
+# pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
+# mkdir text_generation_server/pb || true
+# python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
+#     --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
+# find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
+# touch text_generation_server/pb/__init__.py
+
+# NOTE: currently we comment out server/text_generation_server/pb/.gitignore
+# which includes the generated python files. These then get included in the
+# package. This is not ideal, but works for now.
+python-packages = ["tgi", "text_generation_server"]
+
+
+[project.scripts]
+text-generation-server = "tgi:text_generation_server_cli_main"
diff --git a/tgi/requirements.txt b/tgi/requirements.txt
new file mode 100644
index 00000000..c3cf5d74
--- /dev/null
+++ b/tgi/requirements.txt
@@ -0,0 +1,90 @@
+accelerate==0.30.1
+annotated-types==0.6.0
+attrs==23.2.0
+beautifulsoup4==4.12.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpickle==3.0.0
+Deprecated==1.2.14
+diskcache==5.6.3
+einops==0.8.0
+filelock==3.14.0
+fsspec==2024.5.0
+google==3.0.0
+googleapis-common-protos==1.56.1
+grpc-interceptor==0.15.4
+grpcio==1.63.0
+grpcio-reflection==1.63.0
+grpcio-status==1.63.0
+grpcio-tools==1.63.0
+huggingface-hub==0.23.0
+idna==3.7
+importlib-metadata==7.0.0
+interegular==0.3.3
+Jinja2==3.1.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+lark==1.1.9
+llvmlite==0.42.0
+loguru==0.7.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.3
+numba==0.59.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.1.105
+opentelemetry-api==1.24.0
+opentelemetry-exporter-otlp==1.24.0
+opentelemetry-exporter-otlp-proto-common==1.24.0
+opentelemetry-exporter-otlp-proto-grpc==1.24.0
+opentelemetry-exporter-otlp-proto-http==1.24.0
+opentelemetry-instrumentation==0.45b0
+opentelemetry-instrumentation-grpc==0.45b0
+opentelemetry-proto==1.24.0
+opentelemetry-sdk==1.24.0
+opentelemetry-semantic-conventions==0.45b0
+outlines==0.0.41
+packaging==24.0
+peft==0.11.1
+pillow==10.3.0
+protobuf==4.25.3
+psutil==5.9.8
+pydantic==2.7.1
+pydantic_core==2.18.2
+Pygments==2.18.0
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.1
+safetensors==0.4.3
+shellingham==1.5.4
+soupsieve==2.5
+sympy==1.12
+tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c
+tokenizers==0.19.1
+torch==2.3.0
+tqdm==4.66.4
+transformers==4.41.0
+triton==2.3.0
+typer==0.12.3
+typing_extensions==4.11.0
+urllib3==2.2.1
+wrapt==1.16.0
+zipp==3.18.2
diff --git a/tgi/text_generation_server b/tgi/text_generation_server
new file mode 120000
index 00000000..ce7555e8
--- /dev/null
+++ b/tgi/text_generation_server
@@ -0,0 +1 @@
+../server/text_generation_server
\ No newline at end of file
diff --git a/tgi/tgi/__init__.py b/tgi/tgi/__init__.py
index 2826915e..2e9684a5 100644
--- a/tgi/tgi/__init__.py
+++ b/tgi/tgi/__init__.py
@@ -4,6 +4,7 @@ from tgi import rust_launcher, rust_sleep, fully_packaged
 import asyncio
 from dataclasses import dataclass, asdict
 import sys
+from text_generation_server.cli import app
 
 # add the rust_launcher coroutine to the __all__ list
 __doc__ = tgi.__doc__
@@ -11,6 +12,11 @@ if hasattr(tgi, "__all__"):
     __all__ = tgi.__all__
 
 
+# wrap the app from text_generation_server.cli in a function
+def text_generation_server_cli_main():
+    app()
+
+
 @dataclass
 class Args:
     model_id = "google/gemma-2b-it"