update readme

2025-09-10 03:44:54 +00:00 · 2023-03-07 18:43:00 +01:00 · 2023-03-07 18:43:00 +01:00 · a3e57d3c5d
commit a3e57d3c5d
parent 478d5c1403
2 changed files with 18 additions and 23 deletions
--- a/10
+++ b/10
@ -13,7 +13,7 @@ server-dev:
 	cd server && make run-dev

 router-dev:
-	cd router && cargo run
+	cd router && cargo run -- --port 8080

 integration-tests: install-router install-launcher
 	cargo test
@ -22,16 +22,16 @@ python-tests:
 	cd server && HF_HUB_ENABLE_HF_TRANSFER=1 pytest tests

 run-bloom-560m:
-	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2
+	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --port 8080

 run-bloom-560m-quantize:
-	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize
+	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize --port 8080

 download-bloom:
 	HF_HUB_ENABLE_HF_TRANSFER=1 text-generation-server download-weights bigscience/bloom

 run-bloom:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080

 run-bloom-quantize:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
--- a/README.md
+++ b/README.md
@ -89,40 +89,35 @@ You can then query the model using either the `/generate` or `/generate_stream`
 ```shell
 curl 127.0.0.1:8080/generate \
    -X POST \
-    -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
    -H 'Content-Type: application/json'
 ```

 ```shell
 curl 127.0.0.1:8080/generate_stream \
    -X POST \
-    -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
    -H 'Content-Type: application/json'
 ```

 or from Python:

-```python
-import requests
-
-result = requests.post("http://127.0.0.1:8080/generate", json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
-print(result.json())
-```
-
 ```shell
-pip install sseclient-py
+pip install text-generation
 ```

-````python
-import sseclient
-import requests
+```python
+from text_generation import Client

-r = requests.post("http://127.0.0.1:8080/generate_stream", stream=True, json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
-sse_client = sseclient.SSEClient(r)
+client = Client("http://127.0.0.1:8080")
+print(client.generate("What is Deep Learning?", max_new_tokens=17).generated_text)

-for i, event in enumerate(sse_client.events()):
-    print(i, event.data)
-````
+text = ""
+for response in client.generate_stream("What is Deep Learning?", max_new_tokens=17):
+    if not response.token.special:
+        text += response.token.text
+print(text)
+```

 **Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).