mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
update readme
This commit is contained in:
parent
478d5c1403
commit
a3e57d3c5d
10
Makefile
10
Makefile
@ -13,7 +13,7 @@ server-dev:
|
|||||||
cd server && make run-dev
|
cd server && make run-dev
|
||||||
|
|
||||||
router-dev:
|
router-dev:
|
||||||
cd router && cargo run
|
cd router && cargo run -- --port 8080
|
||||||
|
|
||||||
integration-tests: install-router install-launcher
|
integration-tests: install-router install-launcher
|
||||||
cargo test
|
cargo test
|
||||||
@ -22,16 +22,16 @@ python-tests:
|
|||||||
cd server && HF_HUB_ENABLE_HF_TRANSFER=1 pytest tests
|
cd server && HF_HUB_ENABLE_HF_TRANSFER=1 pytest tests
|
||||||
|
|
||||||
run-bloom-560m:
|
run-bloom-560m:
|
||||||
text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2
|
text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --port 8080
|
||||||
|
|
||||||
run-bloom-560m-quantize:
|
run-bloom-560m-quantize:
|
||||||
text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize
|
text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize --port 8080
|
||||||
|
|
||||||
download-bloom:
|
download-bloom:
|
||||||
HF_HUB_ENABLE_HF_TRANSFER=1 text-generation-server download-weights bigscience/bloom
|
HF_HUB_ENABLE_HF_TRANSFER=1 text-generation-server download-weights bigscience/bloom
|
||||||
|
|
||||||
run-bloom:
|
run-bloom:
|
||||||
text-generation-launcher --model-id bigscience/bloom --num-shard 8
|
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080
|
||||||
|
|
||||||
run-bloom-quantize:
|
run-bloom-quantize:
|
||||||
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize
|
text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
|
31
README.md
31
README.md
@ -89,40 +89,35 @@ You can then query the model using either the `/generate` or `/generate_stream`
|
|||||||
```shell
|
```shell
|
||||||
curl 127.0.0.1:8080/generate \
|
curl 127.0.0.1:8080/generate \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
|
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 127.0.0.1:8080/generate_stream \
|
curl 127.0.0.1:8080/generate_stream \
|
||||||
-X POST \
|
-X POST \
|
||||||
-d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
|
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
|
||||||
-H 'Content-Type: application/json'
|
-H 'Content-Type: application/json'
|
||||||
```
|
```
|
||||||
|
|
||||||
or from Python:
|
or from Python:
|
||||||
|
|
||||||
```python
|
|
||||||
import requests
|
|
||||||
|
|
||||||
result = requests.post("http://127.0.0.1:8080/generate", json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
|
|
||||||
print(result.json())
|
|
||||||
```
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
pip install sseclient-py
|
pip install text-generation
|
||||||
```
|
```
|
||||||
|
|
||||||
````python
|
```python
|
||||||
import sseclient
|
from text_generation import Client
|
||||||
import requests
|
|
||||||
|
|
||||||
r = requests.post("http://127.0.0.1:8080/generate_stream", stream=True, json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
|
client = Client("http://127.0.0.1:8080")
|
||||||
sse_client = sseclient.SSEClient(r)
|
print(client.generate("What is Deep Learning?", max_new_tokens=17).generated_text)
|
||||||
|
|
||||||
for i, event in enumerate(sse_client.events()):
|
text = ""
|
||||||
print(i, event.data)
|
for response in client.generate_stream("What is Deep Learning?", max_new_tokens=17):
|
||||||
````
|
if not response.token.special:
|
||||||
|
text += response.token.text
|
||||||
|
print(text)
|
||||||
|
```
|
||||||
|
|
||||||
**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
|
**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user