mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
Update client SDK snippets (#3207)
* Update client SDK snippets * good catch from copilot
This commit is contained in:
parent
d303c1e37e
commit
7253be349a
@ -14,7 +14,7 @@
|
|||||||
</a>
|
</a>
|
||||||
|
|
||||||
A Rust, Python and gRPC server for text generation inference. Used in production at [Hugging Face](https://huggingface.co)
|
A Rust, Python and gRPC server for text generation inference. Used in production at [Hugging Face](https://huggingface.co)
|
||||||
to power Hugging Chat, the Inference API and Inference Endpoint.
|
to power Hugging Chat, the Inference API and Inference Endpoints.
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ To infer with vision language models through Python, you can use the [`huggingfa
|
|||||||
```python
|
```python
|
||||||
from huggingface_hub import InferenceClient
|
from huggingface_hub import InferenceClient
|
||||||
|
|
||||||
client = InferenceClient("http://127.0.0.1:3000")
|
client = InferenceClient(base_url="http://127.0.0.1:3000")
|
||||||
image = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
|
image = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
|
||||||
prompt = f"What is this a picture of?\n\n"
|
prompt = f"What is this a picture of?\n\n"
|
||||||
for token in client.text_generation(prompt, max_new_tokens=16, stream=True):
|
for token in client.text_generation(prompt, max_new_tokens=16, stream=True):
|
||||||
@ -37,7 +37,7 @@ import base64
|
|||||||
import requests
|
import requests
|
||||||
import io
|
import io
|
||||||
|
|
||||||
client = InferenceClient("http://127.0.0.1:3000")
|
client = InferenceClient(base_url="http://127.0.0.1:3000")
|
||||||
|
|
||||||
# read image from local file
|
# read image from local file
|
||||||
image_path = "rabbit.png"
|
image_path = "rabbit.png"
|
||||||
@ -58,7 +58,7 @@ or via the `chat_completion` endpoint:
|
|||||||
```python
|
```python
|
||||||
from huggingface_hub import InferenceClient
|
from huggingface_hub import InferenceClient
|
||||||
|
|
||||||
client = InferenceClient("http://127.0.0.1:3000")
|
client = InferenceClient(base_url="http://127.0.0.1:3000")
|
||||||
|
|
||||||
chat = client.chat_completion(
|
chat = client.chat_completion(
|
||||||
messages=[
|
messages=[
|
||||||
@ -137,19 +137,19 @@ First, we need to install the `@huggingface/inference` library.
|
|||||||
npm install @huggingface/inference
|
npm install @huggingface/inference
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're using the free Inference API, you can use [Huggingface.js](https://huggingface.co/docs/huggingface.js/inference/README)'s `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint` class to easily interact with the Inference API.
|
Whether you use Inference Providers (our serverless API), or Inference Endpoints, you can call `InferenceClient`.
|
||||||
|
|
||||||
We can create a `HfInferenceEndpoint` providing our endpoint URL and We can create a `HfInferenceEndpoint` providing our endpoint URL and [Hugging Face access token](https://huggingface.co/settings/tokens).
|
We can create a `InferenceClient` providing our endpoint URL and [Hugging Face access token](https://huggingface.co/settings/tokens).
|
||||||
|
|
||||||
```js
|
```js
|
||||||
import { HfInferenceEndpoint } from "@huggingface/inference";
|
import { InferenceClient } from "@huggingface/inference";
|
||||||
|
|
||||||
const hf = new HfInferenceEndpoint("http://127.0.0.1:3000", "HF_TOKEN");
|
const client = new InferenceClient('hf_YOUR_TOKEN', { endpointUrl: 'https://YOUR_ENDPOINT.endpoints.huggingface.cloud' });
|
||||||
|
|
||||||
const prompt =
|
const prompt =
|
||||||
"What is this a picture of?\n\n";
|
"What is this a picture of?\n\n";
|
||||||
|
|
||||||
const stream = hf.textGenerationStream({
|
const stream = client.textGenerationStream({
|
||||||
inputs: prompt,
|
inputs: prompt,
|
||||||
parameters: { max_new_tokens: 16, seed: 42 },
|
parameters: { max_new_tokens: 16, seed: 42 },
|
||||||
});
|
});
|
||||||
|
@ -125,24 +125,26 @@ curl localhost:8080/v1/chat/completions \
|
|||||||
### Streaming with JavaScript
|
### Streaming with JavaScript
|
||||||
|
|
||||||
First, we need to install the `@huggingface/inference` library.
|
First, we need to install the `@huggingface/inference` library.
|
||||||
`npm install @huggingface/inference`
|
|
||||||
|
|
||||||
If you're using the free Inference API, you can use `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint`.
|
```bash
|
||||||
|
npm install @huggingface/inference
|
||||||
|
```
|
||||||
|
|
||||||
|
Whether you use Inference Providers (our serverless API), or Inference Endpoints, you can call `InferenceClient`.
|
||||||
|
|
||||||
We can create a `HfInferenceEndpoint` providing our endpoint URL and credential.
|
|
||||||
|
|
||||||
```js
|
```js
|
||||||
import { HfInferenceEndpoint } from '@huggingface/inference'
|
import { InferenceClient } from '@huggingface/inference';
|
||||||
|
|
||||||
const hf = new HfInferenceEndpoint('https://YOUR_ENDPOINT.endpoints.huggingface.cloud', 'hf_YOUR_TOKEN')
|
const client = new InferenceClient('hf_YOUR_TOKEN', { endpointUrl: 'https://YOUR_ENDPOINT.endpoints.huggingface.cloud' });
|
||||||
|
|
||||||
// prompt
|
// prompt
|
||||||
const prompt = 'What can you do in Nuremberg, Germany? Give me 3 Tips'
|
const prompt = 'What can you do in Nuremberg, Germany? Give me 3 Tips';
|
||||||
|
|
||||||
const stream = hf.textGenerationStream({ inputs: prompt })
|
const stream = client.textGenerationStream({ inputs: prompt });
|
||||||
for await (const r of stream) {
|
for await (const r of stream) {
|
||||||
// yield the generated token
|
// yield the generated token
|
||||||
process.stdout.write(r.token.text)
|
process.stdout.write(r.token.text);
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user