From a2e48ec3a22f384da0b0f3f52d2a3bb526c6c0a0 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Tue, 30 Apr 2024 14:54:11 -0400
Subject: [PATCH] feat: fix typo and add more diagrams

---
 docs/source/_toctree.yml                      |  4 ++--
 docs/source/basic_tutorials/using_guidance.md |  2 +-
 docs/source/conceptual/guidance.md            | 13 ++++++++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index e3cb6c54..c815b535 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -26,7 +26,7 @@
   - local: basic_tutorials/safety
     title: Safety
   - local: basic_tutorials/using_guidance
-    title: Using Guidance, JSON, tools (via outlines)
+    title: Using Guidance, JSON, tools
   - local: basic_tutorials/visual_language_models
     title: Visual Language Models
   title: Tutorials
@@ -46,6 +46,6 @@
   - local: conceptual/speculation
     title: Speculation (Medusa, ngram)
   - local: conceptual/guidance
-    title: How Guidance Works
+    title: How Guidance Works (via outlines)
 
   title: Conceptual Guides
diff --git a/docs/source/basic_tutorials/using_guidance.md b/docs/source/basic_tutorials/using_guidance.md
index a58dd9b4..606f2453 100644
--- a/docs/source/basic_tutorials/using_guidance.md
+++ b/docs/source/basic_tutorials/using_guidance.md
@@ -122,7 +122,7 @@ print(response.json())
 
 ### JSON Schema Integration
 
-If Pydantic's not your style, go raw with direct JSON Schema integration. This is simliar to the first example but with programmatic control.
+If Pydantic's not your style, go raw with direct JSON Schema integration. This is similar to the first example but with programmatic control.
 
 ```python
 import requests
diff --git a/docs/source/conceptual/guidance.md b/docs/source/conceptual/guidance.md
index 6cc58594..75be4ab7 100644
--- a/docs/source/conceptual/guidance.md
+++ b/docs/source/conceptual/guidance.md
@@ -23,7 +23,6 @@ However these use cases can span a wide range of applications, such as:
 - provide reliable and consistent output for downstream tasks
 - extract data from multimodal inputs
 
-
 ## How it works?
 
 Diving into the details, guidance is enabled by including a grammar with a generation request that is compiled, and used to modify the chosen tokens.
@@ -31,6 +30,18 @@ Diving into the details, guidance is enabled by including a grammar with a gener
 This process can be broken down into the following steps:
 
 1. A request is sent to the backend, it is processed and placed in batch. Processing includes compiling the grammar into a finite state machine and a grammar state.
+
+<div class="flex justify-center">
+    <img
+        class="block dark:hidden"
+        src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/request-to-batch.gif"
+    />
+    <img
+        class="hidden dark:block"
+        src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/request-to-batch-dark.gif"
+    />
+</div>
+
 2. The model does a forward pass over the batch. This returns probabilities for each token in the vocabulary for each request in the batch.
 
 3. The process of choosing one of those tokens is called `sampling`. The model samples from the distribution of probabilities to choose the next token. In TGI all of the steps before sampling are called `processor`. Grammars are applied as a processor that masks out tokens that are not allowed by the grammar.