text-generation-inference/server/pyproject.toml
drbh 7e2a7433d3
feat: adds phi model (#1442)
This PR adds basic modeling for phi-2 

run
```bash
text-generation-server \
    serve \
    microsoft/phi-2 \
    --revision 834565c23f9b28b96ccbeabe614dd906b6db551a
```


test
```bash
curl -s localhost:3000/generate \
   -X POST \
   -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
   -H 'Content-Type: application/json' | jq .
# {
#   "generated_text": "\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from data. These"
# }
```



notes 
- recently (~1 day ago) the Phi weights and model were updated to
accommodate adding [GQA/MQA attention to the
model.](https://github.com/huggingface/transformers/pull/28163) This
impl expects the original model format so a fixed revision is required
at the moment.
- this PR only includes a basic implementation of the model and can
later be extended for support Flash and Sharded versions as well as make
use of better optimization
2024-01-25 15:37:53 +01:00

63 lines
1.6 KiB
TOML

[tool.poetry]
name = "text-generation-server"
version = "1.3.4"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
[tool.poetry.scripts]
text-generation-server = 'text_generation_server.cli:app'
[tool.poetry.dependencies]
python = ">=3.9,<3.13"
protobuf = "^4.21.7"
grpcio = "^1.51.1"
grpcio-status = "^1.51.1"
grpcio-reflection = "^1.51.1"
grpc-interceptor = "^0.15.0"
typer = "^0.6.1"
accelerate = { version = "^0.25.0", optional = true }
bitsandbytes = { version = "^0.41.1", optional = true }
safetensors = "^0.3.2"
loguru = "^0.6.0"
opentelemetry-api = "^1.15.0"
opentelemetry-exporter-otlp = "^1.15.0"
opentelemetry-instrumentation-grpc = "^0.36b0"
hf-transfer = "^0.1.2"
sentencepiece = "^0.1.97"
tokenizers = "^0.15.0"
huggingface-hub = "^0.19.3"
transformers = "^4.37.1"
einops = "^0.6.1"
texttable = { version = "^1.6.7", optional = true }
datasets = { version = "^2.14.0", optional = true }
peft = { version = "^0.4.0", optional = true }
torch = { version = "^2.1.1", optional = true }
scipy = "^1.11.1"
pillow = "^10.0.0"
[tool.poetry.extras]
torch = ["torch"]
accelerate = ["accelerate"]
bnb = ["bitsandbytes"]
peft = ["peft"]
quantize = ["texttable", "datasets", "accelerate"]
[tool.poetry.group.dev.dependencies]
grpcio-tools = "^1.51.1"
pytest = "^7.3.0"
[[tool.poetry.source]]
name = "pytorch-gpu-src"
url = "https://download.pytorch.org/whl/cu121"
priority = "explicit"
[tool.pytest.ini_options]
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
[build-system]
requires = [
"poetry-core>=1.0.0",
]
build-backend = "poetry.core.masonry.api"