feat: validate template variables before apply and improve sliding window check

2025-09-12 12:54:52 +00:00 · 2024-08-12 14:16:09 +00:00 · 2024-08-12 14:16:09 +00:00 · 2551456fff
commit 2551456fff
parent 136bcc8128
4 changed files with 84 additions and 13 deletions
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@ -1,3 +1,5 @@
 use std::collections::HashSet;
 use crate::infer::InferError;
 use crate::{
    ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken,
@ -16,6 +18,7 @@ pub(crate) struct ChatTemplate {
    bos_token: Option<String>,
    eos_token: Option<String>,
    use_default_tool_template: bool,
    variables: HashSet<String>,
 }
 impl ChatTemplate {
@ -30,19 +33,22 @@ impl ChatTemplate {
        let template_str = template.into_boxed_str();
        env.add_function("raise_exception", raise_exception);
        // check if contains the tools variable within the template
        let use_default_tool_template =
            !template_str.as_ref().replace(' ', "").contains("{{tools}}");
        // leaking env and template_str as read-only, static resources for performance.
        let template = Box::leak(env)
            .template_from_str(Box::leak(template_str))
            .unwrap();
        // get the list of variables that are used in the template
        let variables = template.undeclared_variables(true);
        // check if the `tools` variable is used in the template
        let use_default_tool_template = !variables.contains("tools");
        Self {
            template,
            bos_token: bos_token.map(|token| token.as_str().to_string()),
            eos_token: eos_token.map(|token| token.as_str().to_string()),
            use_default_tool_template,
            variables,
        }
    }
@ -64,6 +70,11 @@ impl ChatTemplate {
        let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
        // check if guideline is expected but not provided
        if self.variables.contains("guideline") && guideline.is_none() {
            return Err(InferError::MissingTemplateVariable("guideline".to_string()));
        }
        self.template
            .render(ChatTemplateInputs {
                guideline,
@ -770,4 +781,62 @@ mod tests {
            assert_eq!(result, target);
        }
    }
    #[test]
    fn test_chat_template_invalid_with_guideline() {
        let mut env = Environment::new();
        env.add_function("raise_exception", raise_exception);
        let source = "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n    {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n    {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-2].content }}\n    {{- \"\\n<end_of_turn>\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Chatbot Response: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the Chatbot Response violate the above principle? Your answer must  start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n";
        // trim all the whitespace
        let source = source
            .lines()
            .map(|line| line.trim())
            .collect::<Vec<&str>>()
            .join("");
        let tmpl = env.template_from_str(&source);
        let chat_template_inputs = ChatTemplateInputs {
            messages: vec![
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi!".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi again!".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "Hello how can I help?".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "What is Deep Learning?".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "magic!".to_string(),
                },
            ],
            bos_token: Some("[BOS]"),
            eos_token: Some("[EOS]"),
            add_generation_prompt: true,
            ..Default::default()
        };
        let result = tmpl.unwrap().render(chat_template_inputs); //.err().unwrap();
        match result {
            Ok(_) => panic!("Should have failed since no guideline is provided"),
            Err(e) => {
                assert_eq!(
                    e.detail().unwrap(),
                    "Conversation roles must alternate user/assistant/user/assistant/..."
                );
            }
        }
    }
 }
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@ -337,6 +337,8 @@ pub enum InferError {
    IncompleteGeneration,
    #[error("Template error: {0}")]
    TemplateError(#[from] minijinja::Error),
    #[error("Missing template vatiable: {0}")]
    MissingTemplateVariable(String),
    #[error("Tool error: {0}")]
    ToolError(String),
 }
@ -349,6 +351,7 @@ impl InferError {
            InferError::ValidationError(_) => "validation",
            InferError::IncompleteGeneration => "incomplete_generation",
            InferError::TemplateError(_) => "template_error",
            InferError::MissingTemplateVariable(_) => "missing_template_variable",
            InferError::ToolError(_) => "tool_error",
        }
    }
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -2297,6 +2297,7 @@ impl From<InferError> for (StatusCode, Json<ErrorResponse>) {
            InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
            InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
            InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
            InferError::MissingTemplateVariable(_) => StatusCode::UNPROCESSABLE_ENTITY,
            InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY,
        };
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -497,14 +497,12 @@ def get_model(
        else -1
    )
-    if max_input_tokens is not None and max_input_tokens <= sliding_window:
+    should_use_sliding_window = (
-        sliding_window = -1
+        sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING
    )
-    if (
+    if should_use_sliding_window:
-        (sliding_window is not None and sliding_window != -1)
+        if max_input_tokens is not None and max_input_tokens > sliding_window:
        and not SUPPORTS_WINDOWING
        and max_input_tokens > sliding_window
    ):
            raise ValueError(
                f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
            )