From 36b45c2d604508da7c1f0f4e635ebdc8eea9f074 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Thu, 1 May 2025 11:53:23 +0200 Subject: [PATCH] Update explanation on `{% generation %}` and `{% endgeneration %}` removal --- router/src/infer/chat_template.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs index 890091e9..4d28956f 100644 --- a/router/src/infer/chat_template.rs +++ b/router/src/infer/chat_template.rs @@ -47,9 +47,8 @@ impl ChatTemplate { let mutated_template = mutated_template.replace("[::-1]", "|reverse"); // TODO: replace with a better solution // Hack to remove the {% generation %} and {% endgeneration %} statements from - // Phi4 Reasoning chat templates, as those are required when generating a mask - // for the assistant generated tokens, not natively handled yet - // Reference from Transformers at https://github.com/huggingface/transformers/blob/7a3e208892c06a5e278144eaf38c8599a42f53e7/src/transformers/processing_utils.py#L382-L385 + // the Jinja2 chat templates if there, since those are only using for assistant + // masking during training, and should be ignored during inference let mutated_template = mutated_template.replace("{% generation %}", ""); let mutated_template = mutated_template.replace("{% endgeneration %}", "");