diff --git a/docs/source/basic_tutorials/using_guidance.md b/docs/source/basic_tutorials/using_guidance.md index a55082bf..d0008fdb 100644 --- a/docs/source/basic_tutorials/using_guidance.md +++ b/docs/source/basic_tutorials/using_guidance.md @@ -74,6 +74,45 @@ curl localhost:3000/generate \ ``` +### Hugging Face Hub Python Library + +The Hugging Face Hub Python library provides a client that makes it easy to interact with the Messages API. Here's an example of how to use the client to send a request with a grammar parameter. + +```python +from huggingface_hub import InferenceClient + +client = InferenceClient("http://localhost:3000") + +schema = { + "properties": { + "location": {"title": "Location", "type": "string"}, + "activity": {"title": "Activity", "type": "string"}, + "animals_seen": { + "maximum": 5, + "minimum": 1, + "title": "Animals Seen", + "type": "integer", + }, + "animals": {"items": {"type": "string"}, "title": "Animals", "type": "array"}, + }, + "required": ["location", "activity", "animals_seen", "animals"], + "title": "Animals", + "type": "object", +} + +user_input = "I saw a puppy a cat and a raccoon during my bike ride in the park" +resp = client.text_generation( + f"convert to JSON: 'f{user_input}'. please use the following schema: {schema}", + max_new_tokens=100, + seed=42, + grammar={"type": "json", "value": schema}, +) + +print(resp) +# { "activity": "bike ride", "animals": ["puppy", "cat", "raccoon"], "animals_seen": 3, "location": "park" } + +``` + A grammar can be defined using Pydantic models, JSON schemas, or regular expressions. The LLM will then generate a response that conforms to the specified grammar. > Note: A grammar must compile to an intermediate representation to constrain the output. Grammar compilation is a computationally expensive and may take a few seconds to complete on the first request. Subsequent requests will use the cached grammar and will be much faster. @@ -83,125 +122,55 @@ A grammar can be defined using Pydantic models, JSON schemas, or regular express Using Pydantic models we can define a similar grammar as the previous example in a shorter and more readable way. ```python -import requests +from huggingface_hub import InferenceClient from pydantic import BaseModel, conint from typing import List + class Animals(BaseModel): location: str activity: str animals_seen: conint(ge=1, le=5) # Constrained integer type animals: List[str] -prompt = "convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park" -data = { - "inputs": prompt, - "parameters": { - "repetition_penalty": 1.3, - "grammar": { - "type": "json", - "value": Animals.schema() - } - } -} +client = InferenceClient("http://localhost:3000") -headers = { - "Content-Type": "application/json", -} - -response = requests.post( - 'http://127.0.0.1:3000/generate', - headers=headers, - json=data +user_input = "I saw a puppy a cat and a raccoon during my bike ride in the park" +resp = client.text_generation( + f"convert to JSON: 'f{user_input}'. please use the following schema: {Animals.schema()}", + max_new_tokens=100, + seed=42, + grammar={"type": "json", "value": Animals.schema()}, ) -print(response.json()) -# {'generated_text': '{ "activity": "bike riding", "animals": ["puppy","cat","raccoon"],"animals_seen": 3, "location":"park" }'} + +print(resp) +# { "activity": "bike ride", "animals": ["puppy", "cat", "raccoon"], "animals_seen": 3, "location": "park" } + ``` -### JSON Schema Integration - -If Pydantic's not your style, go raw with direct JSON Schema integration. This is similar to the first example but with programmatic control. - -```python -import requests - -json_schema = { - "properties": { - "location": { - "type": "string" - }, - "activity": { - "type": "string" - }, - "animals_seen": { - "type": "integer", - "minimum": 1, - "maximum": 5 - }, - "animals": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": ["location", "activity", "animals_seen", "animals"] -} - -data = { - "inputs": "convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park", - "parameters": { - "max_new_tokens": 200, - "repetition_penalty": 1.3, - "grammar": { - "type": "json", - "value": json_schema - } - } -} - -headers = { - "Content-Type": "application/json", -} - -response = requests.post( - 'http://127.0.0.1:3000/generate', - headers=headers, - json=data -) -print(response.json()) -# {'generated_text': '{\n"activity": "biking",\n"animals": ["puppy","cat","raccoon"]\n , "animals_seen": 3,\n "location":"park"}'} - -``` - -### Hugging Face Hub Python Library - -The Hugging Face Hub Python library provides a client that makes it easy to interact with the Messages API. Here's an example of how to use the client to send a request with a grammar parameter. +defining a grammar as regular expressions ```python from huggingface_hub import InferenceClient -client = InferenceClient( - "http://localhost:3000" # local endpoint - # "meta-llama/Meta-Llama-3-8B-Instruct" # HF serverless endpoint -) +client = InferenceClient("http://localhost:3000") + +regexp = "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)" resp = client.text_generation( - "Whats Googles DNS", - max_new_tokens=10, - decoder_input_details=True, + f"Whats Googles DNS? Please use the following regex: {regexp}", seed=42, grammar={ "type": "regex", - "value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)", + "value": regexp, }, ) print(resp) -# 1.0.0.1 +# 7.1.1.1 ``` diff --git a/docs/source/basic_tutorials/visual_language_models.md b/docs/source/basic_tutorials/visual_language_models.md index d6642173..3770db0b 100644 --- a/docs/source/basic_tutorials/visual_language_models.md +++ b/docs/source/basic_tutorials/visual_language_models.md @@ -115,8 +115,6 @@ print(chat_completion) # ChatCompletion(id='', choices=[Choice(finish_reason='eos_token', index=0, logprobs=None, message=ChatCompletionMessage(content=' The image depicts an anthropomorphic rabbit dressed in a space suit with gear that resembles NASA attire. The setting appears to be a solar eclipse with dramatic mountain peaks and a partial celestial body in the sky. The artwork is detailed and vivid, with a warm color palette and a sense of an adventurous bunny exploring or preparing for a journey beyond Earth. ', role='assistant', function_call=None, tool_calls=None))], created=1714589732, model='llava-hf/llava-v1.6-mistral-7b-hf', object='text_completion', system_fingerprint='2.0.2-native', usage=CompletionUsage(completion_tokens=84, prompt_tokens=2943, total_tokens=3027)) ``` -If you want additional details, you can add `details=True`. In this case, you get a `TextGenerationStreamResponse` which contains additional information such as the probabilities and the tokens. For the final response in the stream, it also returns the full generated text. - ### Inference Through Sending `cURL` Requests To use the `generate_stream` endpoint with curl, you can add the `-N` flag. This flag disables curl default buffering and shows data as it arrives from the server.