From 54cc24b3c920a879a497254e4ea57fa782a2c5c9 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Thu, 1 May 2025 11:14:21 +0200 Subject: [PATCH] Skip `{% generation %}` and `{% endgeneration %}` Custom syntax within the chat template for the Phi4 Reasoning models e.g. https://huggingface.co/microsoft/Phi-4-reasoning-plus, which is AFAIK not handled natively yet, so skipping for now --- router/src/infer/chat_template.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs index b26b3824..890091e9 100644 --- a/router/src/infer/chat_template.rs +++ b/router/src/infer/chat_template.rs @@ -45,6 +45,13 @@ impl ChatTemplate { // It uses python notation to reverse lists, which do not exist in minijinja // so we're using the reverse filter instead. let mutated_template = mutated_template.replace("[::-1]", "|reverse"); + // TODO: replace with a better solution + // Hack to remove the {% generation %} and {% endgeneration %} statements from + // Phi4 Reasoning chat templates, as those are required when generating a mask + // for the assistant generated tokens, not natively handled yet + // Reference from Transformers at https://github.com/huggingface/transformers/blob/7a3e208892c06a5e278144eaf38c8599a42f53e7/src/transformers/processing_utils.py#L382-L385 + let mutated_template = mutated_template.replace("{% generation %}", ""); + let mutated_template = mutated_template.replace("{% endgeneration %}", ""); let template_str = mutated_template.into_boxed_str(); env.add_function("raise_exception", raise_exception);