mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 03:52:08 +00:00
feat: package text-generation-server with tgi library
This commit is contained in:
parent
72d69071ae
commit
af2b2e8388
11
tgi/Makefile
11
tgi/Makefile
@ -2,5 +2,14 @@
|
|||||||
build:
|
build:
|
||||||
maturin build
|
maturin build
|
||||||
|
|
||||||
install: build
|
# by commenting out the line in .gitignore, we can include the generated files in the git repository
|
||||||
|
comment-gitignore:
|
||||||
|
sed -i 's/^\(.*\)/# \1/' ../server/text_generation_server/pb/.gitignore
|
||||||
|
|
||||||
|
remove-comment-gitignore:
|
||||||
|
sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore
|
||||||
|
|
||||||
|
library-install:
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
|
install: build comment-gitignore library-install remove-comment-gitignore
|
||||||
|
@ -13,3 +13,21 @@ classifiers = [
|
|||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
[tool.maturin]
|
[tool.maturin]
|
||||||
features = ["pyo3/extension-module"]
|
features = ["pyo3/extension-module"]
|
||||||
|
|
||||||
|
# TODO: Compile protos before installing text_generation_server
|
||||||
|
|
||||||
|
# pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
|
||||||
|
# mkdir text_generation_server/pb || true
|
||||||
|
# python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
|
||||||
|
# --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
|
||||||
|
# find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
||||||
|
# touch text_generation_server/pb/__init__.py
|
||||||
|
|
||||||
|
# NOTE: currently we comment out server/text_generation_server/pb/.gitignore
|
||||||
|
# which includes the generated python files. These then get included in the
|
||||||
|
# package. This is not ideal, but works for now.
|
||||||
|
python-packages = ["tgi", "text_generation_server"]
|
||||||
|
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
text-generation-server = "tgi:text_generation_server_cli_main"
|
||||||
|
90
tgi/requirements.txt
Normal file
90
tgi/requirements.txt
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
accelerate==0.30.1
|
||||||
|
annotated-types==0.6.0
|
||||||
|
attrs==23.2.0
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
certifi==2024.2.2
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
click==8.1.7
|
||||||
|
cloudpickle==3.0.0
|
||||||
|
Deprecated==1.2.14
|
||||||
|
diskcache==5.6.3
|
||||||
|
einops==0.8.0
|
||||||
|
filelock==3.14.0
|
||||||
|
fsspec==2024.5.0
|
||||||
|
google==3.0.0
|
||||||
|
googleapis-common-protos==1.56.1
|
||||||
|
grpc-interceptor==0.15.4
|
||||||
|
grpcio==1.63.0
|
||||||
|
grpcio-reflection==1.63.0
|
||||||
|
grpcio-status==1.63.0
|
||||||
|
grpcio-tools==1.63.0
|
||||||
|
huggingface-hub==0.23.0
|
||||||
|
idna==3.7
|
||||||
|
importlib-metadata==7.0.0
|
||||||
|
interegular==0.3.3
|
||||||
|
Jinja2==3.1.4
|
||||||
|
jsonschema==4.22.0
|
||||||
|
jsonschema-specifications==2023.12.1
|
||||||
|
lark==1.1.9
|
||||||
|
llvmlite==0.42.0
|
||||||
|
loguru==0.7.2
|
||||||
|
markdown-it-py==3.0.0
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
mdurl==0.1.2
|
||||||
|
mpmath==1.3.0
|
||||||
|
nest-asyncio==1.6.0
|
||||||
|
networkx==3.3
|
||||||
|
numba==0.59.1
|
||||||
|
numpy==1.26.4
|
||||||
|
nvidia-cublas-cu12==12.1.3.1
|
||||||
|
nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
nvidia-cufft-cu12==11.0.2.54
|
||||||
|
nvidia-curand-cu12==10.3.2.106
|
||||||
|
nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
nvidia-nccl-cu12==2.20.5
|
||||||
|
nvidia-nvjitlink-cu12==12.4.127
|
||||||
|
nvidia-nvtx-cu12==12.1.105
|
||||||
|
opentelemetry-api==1.24.0
|
||||||
|
opentelemetry-exporter-otlp==1.24.0
|
||||||
|
opentelemetry-exporter-otlp-proto-common==1.24.0
|
||||||
|
opentelemetry-exporter-otlp-proto-grpc==1.24.0
|
||||||
|
opentelemetry-exporter-otlp-proto-http==1.24.0
|
||||||
|
opentelemetry-instrumentation==0.45b0
|
||||||
|
opentelemetry-instrumentation-grpc==0.45b0
|
||||||
|
opentelemetry-proto==1.24.0
|
||||||
|
opentelemetry-sdk==1.24.0
|
||||||
|
opentelemetry-semantic-conventions==0.45b0
|
||||||
|
outlines==0.0.41
|
||||||
|
packaging==24.0
|
||||||
|
peft==0.11.1
|
||||||
|
pillow==10.3.0
|
||||||
|
protobuf==4.25.3
|
||||||
|
psutil==5.9.8
|
||||||
|
pydantic==2.7.1
|
||||||
|
pydantic_core==2.18.2
|
||||||
|
Pygments==2.18.0
|
||||||
|
PyYAML==6.0.1
|
||||||
|
referencing==0.35.1
|
||||||
|
regex==2024.5.15
|
||||||
|
requests==2.31.0
|
||||||
|
rich==13.7.1
|
||||||
|
rpds-py==0.18.1
|
||||||
|
safetensors==0.4.3
|
||||||
|
shellingham==1.5.4
|
||||||
|
soupsieve==2.5
|
||||||
|
sympy==1.12
|
||||||
|
tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c
|
||||||
|
tokenizers==0.19.1
|
||||||
|
torch==2.3.0
|
||||||
|
tqdm==4.66.4
|
||||||
|
transformers==4.41.0
|
||||||
|
triton==2.3.0
|
||||||
|
typer==0.12.3
|
||||||
|
typing_extensions==4.11.0
|
||||||
|
urllib3==2.2.1
|
||||||
|
wrapt==1.16.0
|
||||||
|
zipp==3.18.2
|
1
tgi/text_generation_server
Symbolic link
1
tgi/text_generation_server
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../server/text_generation_server
|
@ -4,6 +4,7 @@ from tgi import rust_launcher, rust_sleep, fully_packaged
|
|||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
import sys
|
import sys
|
||||||
|
from text_generation_server.cli import app
|
||||||
|
|
||||||
# add the rust_launcher coroutine to the __all__ list
|
# add the rust_launcher coroutine to the __all__ list
|
||||||
__doc__ = tgi.__doc__
|
__doc__ = tgi.__doc__
|
||||||
@ -11,6 +12,11 @@ if hasattr(tgi, "__all__"):
|
|||||||
__all__ = tgi.__all__
|
__all__ = tgi.__all__
|
||||||
|
|
||||||
|
|
||||||
|
# wrap the app from text_generation_server.cli in a function
|
||||||
|
def text_generation_server_cli_main():
|
||||||
|
app()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Args:
|
class Args:
|
||||||
model_id = "google/gemma-2b-it"
|
model_id = "google/gemma-2b-it"
|
||||||
|
Loading…
Reference in New Issue
Block a user