From 36b45c2d604508da7c1f0f4e635ebdc8eea9f074 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Thu, 1 May 2025 11:53:23 +0200
Subject: [PATCH] Update explanation on `{% generation %}` and `{%
 endgeneration %}` removal

---
 router/src/infer/chat_template.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs
index 890091e9..4d28956f 100644
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@@ -47,9 +47,8 @@ impl ChatTemplate {
         let mutated_template = mutated_template.replace("[::-1]", "|reverse");
         // TODO: replace with a better solution
         // Hack to remove the {% generation %} and {% endgeneration %} statements from
-        // Phi4 Reasoning chat templates, as those are required when generating a mask
-        // for the assistant generated tokens, not natively handled yet
-        // Reference from Transformers at https://github.com/huggingface/transformers/blob/7a3e208892c06a5e278144eaf38c8599a42f53e7/src/transformers/processing_utils.py#L382-L385
+        // the Jinja2 chat templates if there, since those are only using for assistant
+        // masking during training, and should be ignored during inference
         let mutated_template = mutated_template.replace("{% generation %}", "");
         let mutated_template = mutated_template.replace("{% endgeneration %}", "");