This commit is contained in:
Vaibhav Srivastav 2024-08-13 19:58:21 +02:00
parent 4abd7d3971
commit 7007394766
2 changed files with 3 additions and 3 deletions

View File

@ -125,7 +125,7 @@ Assume you are serving your model on port 8080, we will query through [Inference
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient(model="http://127.0.0.1:8080")
client = InferenceClient(base_url="http://127.0.0.1:8080")
def inference(message, history):
partial_message = ""

View File

@ -48,7 +48,7 @@ To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate o
```python
from huggingface_hub import InferenceClient
client = InferenceClient("http://127.0.0.1:8080")
client = InferenceClient(base_url="http://127.0.0.1:8080")
output = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
@ -78,7 +78,7 @@ The `huggingface_hub` library also comes with an `AsyncInferenceClient` in case
```python
from huggingface_hub import AsyncInferenceClient
client = AsyncInferenceClient("http://127.0.0.1:8080")
client = AsyncInferenceClient(base_url="http://127.0.0.1:8080")
async def main():
stream = await client.chat.completions.create(
messages=[{"role": "user", "content": "Say this is a test"}],