mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-02 07:22:10 +00:00
This PR adds basic modeling for phi-2 run ```bash text-generation-server \ serve \ microsoft/phi-2 \ --revision 834565c23f9b28b96ccbeabe614dd906b6db551a ``` test ```bash curl -s localhost:3000/generate \ -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \ -H 'Content-Type: application/json' | jq . # { # "generated_text": "\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from data. These" # } ``` notes - recently (~1 day ago) the Phi weights and model were updated to accommodate adding [GQA/MQA attention to the model.](https://github.com/huggingface/transformers/pull/28163) This impl expects the original model format so a fixed revision is required at the moment. - this PR only includes a basic implementation of the model and can later be extended for support Flash and Sharded versions as well as make use of better optimization
63 lines
1.6 KiB
TOML
63 lines
1.6 KiB
TOML
[tool.poetry]
|
|
name = "text-generation-server"
|
|
version = "1.3.4"
|
|
description = "Text Generation Inference Python gRPC Server"
|
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
|
|
|
[tool.poetry.scripts]
|
|
text-generation-server = 'text_generation_server.cli:app'
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.9,<3.13"
|
|
protobuf = "^4.21.7"
|
|
grpcio = "^1.51.1"
|
|
grpcio-status = "^1.51.1"
|
|
grpcio-reflection = "^1.51.1"
|
|
grpc-interceptor = "^0.15.0"
|
|
typer = "^0.6.1"
|
|
accelerate = { version = "^0.25.0", optional = true }
|
|
bitsandbytes = { version = "^0.41.1", optional = true }
|
|
safetensors = "^0.3.2"
|
|
loguru = "^0.6.0"
|
|
opentelemetry-api = "^1.15.0"
|
|
opentelemetry-exporter-otlp = "^1.15.0"
|
|
opentelemetry-instrumentation-grpc = "^0.36b0"
|
|
hf-transfer = "^0.1.2"
|
|
sentencepiece = "^0.1.97"
|
|
tokenizers = "^0.15.0"
|
|
huggingface-hub = "^0.19.3"
|
|
transformers = "^4.37.1"
|
|
einops = "^0.6.1"
|
|
texttable = { version = "^1.6.7", optional = true }
|
|
datasets = { version = "^2.14.0", optional = true }
|
|
peft = { version = "^0.4.0", optional = true }
|
|
torch = { version = "^2.1.1", optional = true }
|
|
scipy = "^1.11.1"
|
|
pillow = "^10.0.0"
|
|
|
|
[tool.poetry.extras]
|
|
torch = ["torch"]
|
|
accelerate = ["accelerate"]
|
|
bnb = ["bitsandbytes"]
|
|
peft = ["peft"]
|
|
quantize = ["texttable", "datasets", "accelerate"]
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
grpcio-tools = "^1.51.1"
|
|
pytest = "^7.3.0"
|
|
|
|
|
|
[[tool.poetry.source]]
|
|
name = "pytorch-gpu-src"
|
|
url = "https://download.pytorch.org/whl/cu121"
|
|
priority = "explicit"
|
|
|
|
[tool.pytest.ini_options]
|
|
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
|
|
|
[build-system]
|
|
requires = [
|
|
"poetry-core>=1.0.0",
|
|
]
|
|
build-backend = "poetry.core.masonry.api"
|