From 8bc10d37ee0613be5af9ac55af7c6c2ec7b8785a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= <angt@huggingface.co>
Date: Thu, 6 Feb 2025 10:31:05 +0000
Subject: [PATCH] Update docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
---
 docs/source/_toctree.yml             | 2 ++
 docs/source/multi_backend_support.md | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 8fcba516..e073353f 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -52,6 +52,8 @@
 - sections:
   - local: backends/trtllm
     title: TensorRT-LLM
+  - local: backends/llamacpp
+    title: Llamacpp
   title: Backends
 - sections:
   - local: reference/launcher
diff --git a/docs/source/multi_backend_support.md b/docs/source/multi_backend_support.md
index c4df15bc..03d6d30b 100644
--- a/docs/source/multi_backend_support.md
+++ b/docs/source/multi_backend_support.md
@@ -11,3 +11,5 @@ TGI remains consistent across backends, allowing you to switch between them seam
 * **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference.
   It utilizes specialized optimizations and custom kernels for enhanced performance.
   However, it requires a model-specific compilation step for each GPU architecture.
+* **[TGI Llamacpp backend](./backends/llamacpp)**: This backend facilitates the deployment of large language models
+  (LLMs) by integrating [llama.cpp][llama.cpp], an advanced inference engine optimized for both CPU and GPU computation.