mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
# What does this PR do? Reworked the loading logic. Idea is to use cleaner loading code: - Remove need for `no_init_weights` - Remove all weird `bnb_linear` and `load_weights` and `post_load_weights`. New code layout: - New class `Weights` in charge of handling loading the weights from multiple files into appropiate tensors (potentially sharded) - TP layers now are "shells", they contain the code to know what kind of sharding we need + eventual `all_reduce`. They do not inherit from linear, but they contain some kind of Linear instead - the contained linear can be either FastLinear, BnbLinear or GPTq Linear next. - All modeling code is explictly made for sharding, process group is just no-ops for non sharded code (removes a lot of test cases)  --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.taildb5d.ts.net> Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> Co-authored-by: OlivierDehaene <olivier@huggingface.co> Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
45 lines
1.2 KiB
TOML
45 lines
1.2 KiB
TOML
[tool.poetry]
|
|
name = "text-generation-server"
|
|
version = "0.8.2"
|
|
description = "Text Generation Inference Python gRPC Server"
|
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
|
|
|
[tool.poetry.scripts]
|
|
text-generation-server = 'text_generation_server.cli:app'
|
|
|
|
[tool.poetry.dependencies]
|
|
python = "^3.9"
|
|
protobuf = "^4.21.7"
|
|
grpcio = "^1.51.1"
|
|
grpcio-status = "^1.51.1"
|
|
grpcio-reflection = "^1.51.1"
|
|
grpc-interceptor = "^0.15.0"
|
|
typer = "^0.6.1"
|
|
accelerate = { version = "^0.19.0", optional = true }
|
|
bitsandbytes = { version = "^0.38.1", optional = true }
|
|
safetensors = "0.3.1"
|
|
loguru = "^0.6.0"
|
|
opentelemetry-api = "^1.15.0"
|
|
opentelemetry-exporter-otlp = "^1.15.0"
|
|
opentelemetry-instrumentation-grpc = "^0.36b0"
|
|
hf-transfer = "^0.1.2"
|
|
sentencepiece = "^0.1.97"
|
|
tokenizers = "0.13.3"
|
|
huggingface-hub = "^0.14.1"
|
|
transformers = "^4.29.2"
|
|
|
|
[tool.poetry.extras]
|
|
accelerate = ["accelerate"]
|
|
bnb = ["bitsandbytes"]
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
grpcio-tools = "^1.51.1"
|
|
pytest = "^7.3.0"
|
|
|
|
[tool.pytest.ini_options]
|
|
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|