mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Switch async client example to use stream
This commit is contained in:
parent
3dfa7d33eb
commit
f3266b8a4a
@ -38,7 +38,7 @@ To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate o
|
|||||||
```python
|
```python
|
||||||
from huggingface_hub import InferenceClient
|
from huggingface_hub import InferenceClient
|
||||||
|
|
||||||
client = InferenceClient(model="http://127.0.0.1:8080")
|
client = InferenceClient("http://127.0.0.1:8080")
|
||||||
for token in client.text_generation("How do you make cheese?", max_new_tokens=12, stream=True):
|
for token in client.text_generation("How do you make cheese?", max_new_tokens=12, stream=True):
|
||||||
print(token)
|
print(token)
|
||||||
|
|
||||||
@ -73,9 +73,21 @@ The `huggingface_hub` library also comes with an `AsyncInferenceClient` in case
|
|||||||
```python
|
```python
|
||||||
from huggingface_hub import AsyncInferenceClient
|
from huggingface_hub import AsyncInferenceClient
|
||||||
|
|
||||||
client = AsyncInferenceClient(URL_TO_ENDPOINT_SERVING_TGI)
|
client = AsyncInferenceClient("http://127.0.0.1:8080")
|
||||||
await client.text_generation("How do you make cheese?")
|
async for token in await client.text_generation("How do you make cheese?", stream=True):
|
||||||
# \nTo make cheese, you need to start with milk.
|
print(token)
|
||||||
|
|
||||||
|
# To
|
||||||
|
# make
|
||||||
|
# cheese
|
||||||
|
#,
|
||||||
|
# you
|
||||||
|
# need
|
||||||
|
# to
|
||||||
|
# start
|
||||||
|
# with
|
||||||
|
# milk
|
||||||
|
#.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Streaming with cURL
|
### Streaming with cURL
|
||||||
|
Loading…
Reference in New Issue
Block a user