feat: prefer huggingface_hub in docs and show image api

This commit is contained in:
drbh 2024-05-01 19:04:50 +00:00
parent 27b3a2c9fc
commit f8e31c0243
2 changed files with 151 additions and 118 deletions

View File

@ -2,7 +2,7 @@
Text Generation Inference (TGI) now supports [JSON and regex grammars](#grammar-and-constraints) and [tools and functions](#tools-and-functions) to help developers guide LLM responses to fit their needs.
These feature are available starting from version `1.4.3`. They are accessible via the [text_generation](https://pypi.org/project/text-generation/) library. The tool support is compatible with OpenAI's client libraries. The following guide will walk you through the new features and how to use them!
These feature are available starting from version `1.4.3`. They are accessible via the [`huggingface_hub`](https://pypi.org/project/huggingface-hub/) library. The tool support is compatible with OpenAI's client libraries. The following guide will walk you through the new features and how to use them!
_note: guidance is supported as grammar in the `/generate` endpoint and as tools in the `/chat/completions` endpoint._
@ -176,41 +176,32 @@ print(response.json())
```
### Using the client
### Hugging Face Hub Python Library
TGI provides a client library to that make it easy to send requests with all of the parameters we've discussed above. Here's an example of how to use the client to send a request with a grammar parameter.
The Hugging Face Hub Python library provides a client that makes it easy to interact with the Messages API. Here's an example of how to use the client to send a request with a grammar parameter.
```python
from text_generation import AsyncClient
from text_generation.types import GrammarType
from huggingface_hub import InferenceClient
# NOTE: tools defined above and removed for brevity
client = InferenceClient(
"http://localhost:3000" # local endpoint
# "meta-llama/Meta-Llama-3-8B-Instruct" # HF serverless endpoint
)
# Define an async function to encapsulate the async operation
async def main():
client = AsyncClient(base_url="http://localhost:3000")
# Use 'await' to wait for the async method 'chat' to complete
response = await client.generate(
resp = client.text_generation(
"Whats Googles DNS",
max_new_tokens=10,
decoder_input_details=True,
seed=1,
seed=42,
grammar={
"type": GrammarType.Regex,
"type": "regex",
"value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)",
},
)
)
# Once the response is received, you can process it
print(response.generated_text)
# Ensure the main async function is run in the event loop
if __name__ == "__main__":
import asyncio
asyncio.run(main())
# 118.8.0.84
print(resp)
# 1.0.0.1
```
@ -265,54 +256,16 @@ curl localhost:3000/v1/chat/completions \
// {"id":"","object":"text_completion","created":1709051640,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":19,"total_tokens":176}}
```
### Text Generation Inference Client
### Chat Completion with Tools
TGI provides a client library to interact with the Messages API and Tool functions. The client library is available in both synchronous and asynchronous versions.
Grammars are supported in the `/generate` endpoint, while tools are supported in the `/chat/completions` endpoint. Here's an example of how to use the client to send a request with a tool parameter.
```python
from text_generation import AsyncClient
from huggingface_hub import InferenceClient
# NOTE: tools defined above and removed for brevity
client = InferenceClient("http://localhost:3000")
# Define an async function to encapsulate the async operation
async def main():
client = AsyncClient(base_url="http://localhost:3000")
# Use 'await' to wait for the async method 'chat' to complete
response = await client.chat(
max_tokens=100,
seed=1,
tools=tools,
presence_penalty=-1.1,
messages=[
{
"role": "system",
"content": "You're a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "What is the weather like in Brooklyn, New York?",
},
],
)
# Once the response is received, you can process it
print(response.choices[0].message.tool_calls)
# Ensure the main async function is run in the event loop
if __name__ == "__main__":
import asyncio
asyncio.run(main())
# {"id":"","object":"text_completion","created":1709051942,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":20,"total_tokens":177}}
```
<details>
<summary>Tools used in example above</summary>
```python
tools = [
tools = [
{
"type": "function",
"function": {
@ -360,11 +313,29 @@ if __name__ == "__main__":
"required": ["location", "format", "num_days"],
},
},
}
]
```
},
]
</details>
chat = client.chat_completion(
messages=[
{
"role": "system",
"content": "You're a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "What is the weather like in Brooklyn, New York?",
},
],
tools=tools,
seed=42,
max_tokens=100,
)
print(chat.choices[0].message.tool_calls)
# [ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments={'format': 'fahrenheit', 'location': 'Brooklyn, New York', 'num_days': 7}, name='get_n_day_weather_forecast', description=None), id=0, type='function')]
```
### OpenAI integration

View File

@ -53,6 +53,68 @@ for token in client.text_generation(prompt, max_new_tokens=10, stream=True):
# This is a picture of an anthropomorphic rabbit in a space suit.
```
or via the `chat_completion` endpoint:
```python
from huggingface_hub import InferenceClient
client = InferenceClient("http://127.0.0.1:3000")
chat = client.chat_completion(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
},
},
],
},
],
seed=42,
max_tokens=100,
)
print(chat)
# ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='length', index=0, message=ChatCompletionOutputMessage(role='assistant', content=" The image you've provided features an anthropomorphic rabbit in spacesuit attire. This rabbit is depicted with human-like posture and movement, standing on a rocky terrain with a vast, reddish-brown landscape in the background. The spacesuit is detailed with mission patches, circuitry, and a helmet that covers the rabbit's face and ear, with an illuminated red light on the chest area.\n\nThe artwork style is that of a", name=None, tool_calls=None), logprobs=None)], created=1714589614, id='', model='llava-hf/llava-v1.6-mistral-7b-hf', object='text_completion', system_fingerprint='2.0.2-native', usage=ChatCompletionOutputUsage(completion_tokens=100, prompt_tokens=2943, total_tokens=3043))
```
or with OpenAi's library:
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(base_url="http://localhost:3000/v1", api_key="-")
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Whats in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
},
},
],
},
],
stream=False,
)
print(chat_completion)
# ChatCompletion(id='', choices=[Choice(finish_reason='eos_token', index=0, logprobs=None, message=ChatCompletionMessage(content=' The image depicts an anthropomorphic rabbit dressed in a space suit with gear that resembles NASA attire. The setting appears to be a solar eclipse with dramatic mountain peaks and a partial celestial body in the sky. The artwork is detailed and vivid, with a warm color palette and a sense of an adventurous bunny exploring or preparing for a journey beyond Earth. ', role='assistant', function_call=None, tool_calls=None))], created=1714589732, model='llava-hf/llava-v1.6-mistral-7b-hf', object='text_completion', system_fingerprint='2.0.2-native', usage=CompletionUsage(completion_tokens=84, prompt_tokens=2943, total_tokens=3027))
```
If you want additional details, you can add `details=True`. In this case, you get a `TextGenerationStreamResponse` which contains additional information such as the probabilities and the tokens. For the final response in the stream, it also returns the full generated text.
### Inference Through Sending `cURL` Requests