From 8de10acdcfb640b7db44cf0595146877f4a8594d Mon Sep 17 00:00:00 2001
From: Vaibhav Srivastav <vaibhavs10@gmail.com>
Date: Tue, 13 Aug 2024 15:47:25 +0200
Subject: [PATCH] Improve the Consuming TGI docs.

---
 docs/openapi.json                            | 18 ++-------------
 docs/source/basic_tutorials/consuming_tgi.md | 23 ++++++++++++++++----
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index df21e19d..9d281a48 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -819,13 +819,6 @@
             "example": "1.0",
             "nullable": true
           },
-          "guideline": {
-            "type": "string",
-            "description": "A guideline to be used in the chat_template",
-            "default": "null",
-            "example": "null",
-            "nullable": true
-          },
           "logit_bias": {
             "type": "array",
             "items": {
@@ -1824,8 +1817,7 @@
         "type": "object",
         "required": [
           "finish_reason",
-          "generated_tokens",
-          "input_length"
+          "generated_tokens"
         ],
         "properties": {
           "finish_reason": {
@@ -1837,12 +1829,6 @@
             "example": 1,
             "minimum": 0
           },
-          "input_length": {
-            "type": "integer",
-            "format": "int32",
-            "example": 1,
-            "minimum": 0
-          },
           "seed": {
             "type": "integer",
             "format": "int64",
@@ -2094,4 +2080,4 @@
       "description": "Hugging Face Text Generation Inference API"
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/docs/source/basic_tutorials/consuming_tgi.md b/docs/source/basic_tutorials/consuming_tgi.md
index 4829ec7c..cda1f5ab 100644
--- a/docs/source/basic_tutorials/consuming_tgi.md
+++ b/docs/source/basic_tutorials/consuming_tgi.md
@@ -1,18 +1,33 @@
 # Consuming Text Generation Inference
 
-There are many ways you can consume Text Generation Inference server in your applications. After launching, you can use the `/generate` route and make a `POST` request to get results from the server. You can also use the `/generate_stream` route if you want TGI to return a stream of tokens. You can make the requests using the tool of your preference, such as curl, Python or TypeScrpt. For a final end-to-end experience, we also open-sourced ChatUI, a chat interface for open-source models.
+There are many ways to consume Text Generation Inference (TGI) server in your applications. After launching the server, you can use the [Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api) `/v1/chat/completions` route and make a `POST` request to get results from the server. You can also pass `"stream": true` to the call if you want TGI to return a stream of tokens. You can make the requests using the tool of your preference, such as curl, Python or TypeScript. For a final end-to-end experience, we have also open-sourced ChatUI, a chat interface for open-source models.
 
 ## curl
 
-After the launch, you can query the model using either the `/generate` or `/generate_stream` routes:
+After a successful server launch, you can query the model using the `v1/chat/completions` route to get OpenAI Chat Completion API spec compliant responses:
 
 ```bash
-curl 127.0.0.1:8080/generate \
+curl localhost:3000/v1/chat/completions \
     -X POST \
-    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
+    -d '{
+  "model": "tgi",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant."
+    },
+    {
+      "role": "user",
+      "content": "What is deep learning?"
+    }
+  ],
+  "stream": true,
+  "max_tokens": 20
+}' \
     -H 'Content-Type: application/json'
 ```
 
+You can update the `stream` parameter to `false` to get a non-streaming response.
 
 ## Inference Client