mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
fix: avoid library name collision and add core deps to build
This commit is contained in:
parent
70b27c4b2a
commit
38688ba45d
@ -2,6 +2,8 @@ use std::fs;
|
|||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
println!("cargo:rerun-if-changed=../../proto/generate.proto");
|
println!("cargo:rerun-if-changed=../../proto/generate.proto");
|
||||||
|
// TODO: avoid this when building python library?
|
||||||
|
if false {
|
||||||
fs::create_dir("src/pb").unwrap_or(());
|
fs::create_dir("src/pb").unwrap_or(());
|
||||||
|
|
||||||
let mut config = prost_build::Config::new();
|
let mut config = prost_build::Config::new();
|
||||||
@ -14,6 +16,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
.include_file("mod.rs")
|
.include_file("mod.rs")
|
||||||
.compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
|
.compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
|
||||||
.unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
|
.unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -242,8 +242,6 @@ pub async fn internal_main_args() -> Result<(), RouterError> {
|
|||||||
.skip_while(|a| a.contains("python"))
|
.skip_while(|a| a.contains("python"))
|
||||||
.collect();
|
.collect();
|
||||||
let args = Args::parse_from(args);
|
let args = Args::parse_from(args);
|
||||||
|
|
||||||
println!("{:?}", args);
|
|
||||||
let out = internal_main(
|
let out = internal_main(
|
||||||
args.max_concurrent_requests,
|
args.max_concurrent_requests,
|
||||||
args.max_best_of,
|
args.max_best_of,
|
||||||
@ -309,10 +307,7 @@ pub async fn internal_main(
|
|||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
) -> Result<(), RouterError> {
|
) -> Result<(), RouterError> {
|
||||||
// Launch Tokio runtime
|
// Launch Tokio runtime
|
||||||
if otlp_endpoint.is_some() {
|
|
||||||
// Initialize if OpenTelemetry is enabled
|
|
||||||
init_logging(otlp_endpoint, json_output);
|
init_logging(otlp_endpoint, json_output);
|
||||||
}
|
|
||||||
|
|
||||||
// Validate args
|
// Validate args
|
||||||
if max_input_tokens >= max_total_tokens {
|
if max_input_tokens >= max_total_tokens {
|
||||||
|
4568
tgi/Cargo.lock
generated
Normal file
4568
tgi/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tgi"
|
name = "tgi"
|
||||||
version = "0.1.0"
|
version = "0.0.1"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
11
tgi/Makefile
11
tgi/Makefile
@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
build:
|
build:
|
||||||
maturin build
|
maturin build --sdist --interpreter python3.11
|
||||||
|
|
||||||
# by commenting out the line in .gitignore, we can include the generated files in the git repository
|
# by commenting out the line in .gitignore, we can include the generated files in the git repository
|
||||||
comment-gitignore:
|
comment-gitignore:
|
||||||
@ -9,9 +9,16 @@ comment-gitignore:
|
|||||||
remove-comment-gitignore:
|
remove-comment-gitignore:
|
||||||
sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore
|
sed -i 's/^# \(.*\)/\1/' ../server/text_generation_server/pb/.gitignore
|
||||||
|
|
||||||
|
comment-gitignore2:
|
||||||
|
sed -i 's/^\(.*\)/# \1/' ../router/client/src/pb/.gitignore
|
||||||
|
|
||||||
|
remove-comment-gitignore2:
|
||||||
|
sed -i 's/^# \(.*\)/\1/' ../router/client/src/pb/.gitignore
|
||||||
|
|
||||||
|
|
||||||
library-install:
|
library-install:
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
install: comment-gitignore build library-install remove-comment-gitignore
|
install: comment-gitignore comment-gitignore2 build library-install remove-comment-gitignore remove-comment-gitignore2
|
||||||
|
|
||||||
quick-install: build library-install
|
quick-install: build library-install
|
||||||
|
@ -3,6 +3,59 @@ requires = ["maturin>=1.5,<2.0"]
|
|||||||
build-backend = "maturin"
|
build-backend = "maturin"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
|
dependencies = [
|
||||||
|
"backoff==2.2.1",
|
||||||
|
"certifi==2024.2.2",
|
||||||
|
"charset-normalizer==3.3.2",
|
||||||
|
"click==8.1.7",
|
||||||
|
"colorama==0.4.6",
|
||||||
|
"deprecated==1.2.14",
|
||||||
|
"einops==0.6.1",
|
||||||
|
"filelock==3.14.0",
|
||||||
|
"fsspec==2024.3.1",
|
||||||
|
"googleapis-common-protos==1.63.0",
|
||||||
|
"grpc-interceptor==0.15.4",
|
||||||
|
"grpcio-reflection==1.62.2",
|
||||||
|
"grpcio-status==1.62.2",
|
||||||
|
"grpcio==1.63.0",
|
||||||
|
"hf-transfer==0.1.6",
|
||||||
|
"huggingface-hub==0.23.0",
|
||||||
|
"idna==3.7",
|
||||||
|
"loguru==0.6.0",
|
||||||
|
"numpy==1.26.4",
|
||||||
|
"opentelemetry-api==1.15.0",
|
||||||
|
"opentelemetry-exporter-otlp-proto-grpc==1.15.0",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http==1.15.0",
|
||||||
|
"opentelemetry-exporter-otlp==1.15.0",
|
||||||
|
"opentelemetry-instrumentation-grpc==0.36b0",
|
||||||
|
"opentelemetry-instrumentation==0.36b0",
|
||||||
|
"opentelemetry-proto==1.15.0",
|
||||||
|
"opentelemetry-sdk==1.15.0",
|
||||||
|
"opentelemetry-semantic-conventions==0.36b0",
|
||||||
|
"packaging==24.0",
|
||||||
|
"pillow==10.3.0",
|
||||||
|
"prometheus-client==0.20.0",
|
||||||
|
"protobuf==4.25.3",
|
||||||
|
"py-cpuinfo==9.0.0",
|
||||||
|
"pyyaml==6.0.1",
|
||||||
|
"regex==2024.5.10",
|
||||||
|
"requests==2.31.0",
|
||||||
|
"safetensors==0.4.3",
|
||||||
|
"scipy==1.13.0",
|
||||||
|
"sentencepiece==0.1.99",
|
||||||
|
"setuptools==69.5.1",
|
||||||
|
"tokenizers==0.19.1",
|
||||||
|
"tqdm==4.66.4",
|
||||||
|
"transformers @ git+https://github.com/huggingface/transformers.git@b8aee2e918d7ba2d5e9e80162ae26b4806873307",
|
||||||
|
"typer==0.6.1",
|
||||||
|
"typing-extensions==4.11.0",
|
||||||
|
"urllib3==2.2.1",
|
||||||
|
"win32-setctime==1.1.0",
|
||||||
|
"wrapt==1.16.0",
|
||||||
|
"torch==2.1.0",
|
||||||
|
"peft==0.10.0",
|
||||||
|
"outlines==0.0.34",
|
||||||
|
]
|
||||||
name = "tgi"
|
name = "tgi"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
@ -13,15 +66,7 @@ classifiers = [
|
|||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
[tool.maturin]
|
[tool.maturin]
|
||||||
features = ["pyo3/extension-module"]
|
features = ["pyo3/extension-module"]
|
||||||
|
module-name = "tgi._tgi"
|
||||||
# TODO: Compile protos before installing text_generation_server
|
|
||||||
|
|
||||||
# pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
|
|
||||||
# mkdir text_generation_server/pb || true
|
|
||||||
# python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb \
|
|
||||||
# --grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/generate.proto
|
|
||||||
# find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
|
||||||
# touch text_generation_server/pb/__init__.py
|
|
||||||
|
|
||||||
# NOTE: currently we comment out server/text_generation_server/pb/.gitignore
|
# NOTE: currently we comment out server/text_generation_server/pb/.gitignore
|
||||||
# which includes the generated python files. These then get included in the
|
# which includes the generated python files. These then get included in the
|
||||||
|
@ -1,90 +0,0 @@
|
|||||||
accelerate==0.30.1
|
|
||||||
annotated-types==0.6.0
|
|
||||||
attrs==23.2.0
|
|
||||||
beautifulsoup4==4.12.3
|
|
||||||
certifi==2024.2.2
|
|
||||||
charset-normalizer==3.3.2
|
|
||||||
click==8.1.7
|
|
||||||
cloudpickle==3.0.0
|
|
||||||
Deprecated==1.2.14
|
|
||||||
diskcache==5.6.3
|
|
||||||
einops==0.8.0
|
|
||||||
filelock==3.14.0
|
|
||||||
fsspec==2024.5.0
|
|
||||||
google==3.0.0
|
|
||||||
googleapis-common-protos==1.56.1
|
|
||||||
grpc-interceptor==0.15.4
|
|
||||||
grpcio==1.63.0
|
|
||||||
grpcio-reflection==1.63.0
|
|
||||||
grpcio-status==1.63.0
|
|
||||||
grpcio-tools==1.63.0
|
|
||||||
huggingface-hub==0.23.0
|
|
||||||
idna==3.7
|
|
||||||
importlib-metadata==7.0.0
|
|
||||||
interegular==0.3.3
|
|
||||||
Jinja2==3.1.4
|
|
||||||
jsonschema==4.22.0
|
|
||||||
jsonschema-specifications==2023.12.1
|
|
||||||
lark==1.1.9
|
|
||||||
llvmlite==0.42.0
|
|
||||||
loguru==0.7.2
|
|
||||||
markdown-it-py==3.0.0
|
|
||||||
MarkupSafe==2.1.5
|
|
||||||
mdurl==0.1.2
|
|
||||||
mpmath==1.3.0
|
|
||||||
nest-asyncio==1.6.0
|
|
||||||
networkx==3.3
|
|
||||||
numba==0.59.1
|
|
||||||
numpy==1.26.4
|
|
||||||
nvidia-cublas-cu12==12.1.3.1
|
|
||||||
nvidia-cuda-cupti-cu12==12.1.105
|
|
||||||
nvidia-cuda-nvrtc-cu12==12.1.105
|
|
||||||
nvidia-cuda-runtime-cu12==12.1.105
|
|
||||||
nvidia-cudnn-cu12==8.9.2.26
|
|
||||||
nvidia-cufft-cu12==11.0.2.54
|
|
||||||
nvidia-curand-cu12==10.3.2.106
|
|
||||||
nvidia-cusolver-cu12==11.4.5.107
|
|
||||||
nvidia-cusparse-cu12==12.1.0.106
|
|
||||||
nvidia-nccl-cu12==2.20.5
|
|
||||||
nvidia-nvjitlink-cu12==12.4.127
|
|
||||||
nvidia-nvtx-cu12==12.1.105
|
|
||||||
opentelemetry-api==1.24.0
|
|
||||||
opentelemetry-exporter-otlp==1.24.0
|
|
||||||
opentelemetry-exporter-otlp-proto-common==1.24.0
|
|
||||||
opentelemetry-exporter-otlp-proto-grpc==1.24.0
|
|
||||||
opentelemetry-exporter-otlp-proto-http==1.24.0
|
|
||||||
opentelemetry-instrumentation==0.45b0
|
|
||||||
opentelemetry-instrumentation-grpc==0.45b0
|
|
||||||
opentelemetry-proto==1.24.0
|
|
||||||
opentelemetry-sdk==1.24.0
|
|
||||||
opentelemetry-semantic-conventions==0.45b0
|
|
||||||
outlines==0.0.41
|
|
||||||
packaging==24.0
|
|
||||||
peft==0.11.1
|
|
||||||
pillow==10.3.0
|
|
||||||
protobuf==4.25.3
|
|
||||||
psutil==5.9.8
|
|
||||||
pydantic==2.7.1
|
|
||||||
pydantic_core==2.18.2
|
|
||||||
Pygments==2.18.0
|
|
||||||
PyYAML==6.0.1
|
|
||||||
referencing==0.35.1
|
|
||||||
regex==2024.5.15
|
|
||||||
requests==2.31.0
|
|
||||||
rich==13.7.1
|
|
||||||
rpds-py==0.18.1
|
|
||||||
safetensors==0.4.3
|
|
||||||
shellingham==1.5.4
|
|
||||||
soupsieve==2.5
|
|
||||||
sympy==1.12
|
|
||||||
tgi @ file:///tmp/tgi-0.1.0-cp311-cp311-manylinux_2_31_x86_64.whl#sha256=9e3906d27588e58d9771c1ff8d7b90ab09a35c2928f8e04c94ac74a39bab1a5c
|
|
||||||
tokenizers==0.19.1
|
|
||||||
torch==2.3.0
|
|
||||||
tqdm==4.66.4
|
|
||||||
transformers==4.41.0
|
|
||||||
triton==2.3.0
|
|
||||||
typer==0.12.3
|
|
||||||
typing_extensions==4.11.0
|
|
||||||
urllib3==2.2.1
|
|
||||||
wrapt==1.16.0
|
|
||||||
zipp==3.18.2
|
|
@ -192,7 +192,7 @@ fn rust_launcher_cli(_py: Python<'_>) -> PyResult<String> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[pymodule]
|
#[pymodule]
|
||||||
fn tgi(_py: Python, m: &PyModule) -> PyResult<()> {
|
fn _tgi(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
m.add_function(wrap_pyfunction!(rust_sleep, m)?)?;
|
m.add_function(wrap_pyfunction!(rust_sleep, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(rust_router, m)?)?;
|
m.add_function(wrap_pyfunction!(rust_router, m)?)?;
|
||||||
m.add_function(wrap_pyfunction!(rust_launcher, m)?)?;
|
m.add_function(wrap_pyfunction!(rust_launcher, m)?)?;
|
||||||
|
@ -1,27 +1,25 @@
|
|||||||
from .tgi import *
|
|
||||||
import threading
|
import threading
|
||||||
from tgi import rust_router, rust_launcher, rust_launcher_cli
|
from tgi import _tgi
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from text_generation_server.cli import app
|
from text_generation_server.cli import app
|
||||||
|
|
||||||
# add the rust_launcher coroutine to the __all__ list
|
# add the rust_launcher coroutine to the __all__ list
|
||||||
__doc__ = tgi.__doc__
|
__doc__ = _tgi.__doc__
|
||||||
if hasattr(tgi, "__all__"):
|
if hasattr(_tgi, "__all__"):
|
||||||
__all__ = tgi.__all__
|
__all__ = _tgi.__all__
|
||||||
|
|
||||||
|
|
||||||
# wrap the app from text_generation_server.cli in a function
|
|
||||||
def text_generation_server_cli_main():
|
def text_generation_server_cli_main():
|
||||||
app()
|
app()
|
||||||
|
|
||||||
|
|
||||||
def text_generation_router_cli_main():
|
def text_generation_router_cli_main():
|
||||||
rust_router()
|
_tgi.rust_router()
|
||||||
|
|
||||||
|
|
||||||
def text_generation_launcher_cli_main():
|
def text_generation_launcher_cli_main():
|
||||||
rust_launcher_cli()
|
_tgi.rust_launcher_cli()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -88,7 +86,7 @@ class TGI(object):
|
|||||||
print(args)
|
print(args)
|
||||||
args = Args(**args)
|
args = Args(**args)
|
||||||
try:
|
try:
|
||||||
await rust_launcher(
|
await _tgi.rust_launcher(
|
||||||
args.model_id,
|
args.model_id,
|
||||||
args.revision,
|
args.revision,
|
||||||
args.validation_workers,
|
args.validation_workers,
|
||||||
|
Loading…
Reference in New Issue
Block a user