text-generation-inference/router/src/infer/v3/scheduler.rs

/// Batching and inference logic
use crate::infer::v3::queue::{Entry, Queue};
use crate::infer::{
    GenerateStreamResponse, GeneratedText, InferError, InferStreamResponse, Scheduler,
};
use crate::validation::ValidGenerateRequest;
use crate::{FinishReason, PrefillToken, Token};
use nohash_hasher::IntMap;
use std::sync::{
    atomic::{AtomicBool, Ordering},
    Arc,
};
use text_generation_client::v3::{Batch, CachedBatch, Generation, ShardedClient};
use text_generation_client::ClientError;
use tokio::sync::mpsc::error::SendError;
use tokio::sync::{mpsc, Notify, OwnedSemaphorePermit};
use tokio::time::Instant;
use tokio_stream::wrappers::UnboundedReceiverStream;
use tracing::{info_span, instrument, Instrument, Span};

pub(crate) struct SchedulerV3 {
    /// Request queue
    queue: Queue,
    /// Notify batcher on queue appends
    batching_task_notifier: Arc<Notify>,
}

impl SchedulerV3 {
    #[allow(clippy::too_many_arguments)]
    pub(crate) fn new(
        client: ShardedClient,
        waiting_served_ratio: f32,
        max_batch_prefill_tokens: u32,
        max_batch_total_tokens: u32,
        max_waiting_tokens: usize,
        max_batch_size: Option<usize>,
        requires_padding: bool,
        window_size: Option<u32>,
        speculate: u32,
        generation_health: Arc<AtomicBool>,
    ) -> Self {
        let queue = Queue::new(requires_padding, 16, window_size, speculate);
        let batching_task_notifier = Arc::new(Notify::new());

        // Spawn batching background task that contains all the inference logic
        tokio::spawn(batching_task(
            client,
            waiting_served_ratio,
            max_batch_prefill_tokens,
            max_batch_total_tokens,
            max_waiting_tokens,
            max_batch_size,
            queue.clone(),
            batching_task_notifier.clone(),
            generation_health,
        ));

        Self {
            queue,
            batching_task_notifier,
        }
    }
}

impl Scheduler for SchedulerV3 {
    #[instrument(skip_all)]
    fn schedule(
        &self,
        request: ValidGenerateRequest,
        permit: OwnedSemaphorePermit,
    ) -> Result<GenerateStreamResponse, InferError> {
        // MPSC channel to communicate with the background batching task
        let (response_tx, response_rx) = mpsc::unbounded_channel();
        let input_length = request.input_length;

        // Append the request to the queue
        self.queue.append(Entry {
            request,
            response_tx,
            span: Span::current(),
            temp_span: None,
            queue_time: Instant::now(),
            batch_time: None,
        });

        // Notify the background task that we have a new entry in the queue that needs
        // to be batched
        self.batching_task_notifier.notify_one();

        // Return stream
        Ok((
            permit,
            input_length,
            UnboundedReceiverStream::new(response_rx),
        ))
    }
}

/// Batching logic
/// Will be launched in a background Tokio task
///
/// Batches requests and sends them to the inference server
#[allow(clippy::too_many_arguments)]
pub(crate) async fn batching_task(
    mut client: ShardedClient,
    waiting_served_ratio: f32,
    max_batch_prefill_tokens: u32,
    max_batch_total_tokens: u32,
    max_waiting_tokens: usize,
    max_batch_size: Option<usize>,
    queue: Queue,
    notifier: Arc<Notify>,
    generation_health: Arc<AtomicBool>,
) {
    // Infinite loop
    loop {
        // Wait for a notification from the Infer struct
        notifier.notified().await;

        // Get the next batch from the queue
        // This batch might be smaller than the maximum batch size if there are not enough requests
        // waiting in the queue
        while let Some((mut entries, batch, span)) = queue
            .next_batch(
                None,
                max_batch_size,
                max_batch_prefill_tokens,
                max_batch_total_tokens,
            )
            .await
        {
            let mut cached_batch = prefill(&mut client, batch, &mut entries, &generation_health)
                .instrument(span)
                .await;
            let mut waiting_tokens = 1;

            // We loop until we do not receive any cached batch from the inference server (== until
            // all requests have met their stopping criteria)
            while let Some(batch) = cached_batch {
                // Get current batch info
                let batch_size = batch.size;
                let batch_max_tokens = batch.max_tokens;
                let mut batches = vec![batch];
                metrics::gauge!("tgi_batch_current_size", batch_size as f64);
                metrics::gauge!("tgi_batch_current_max_tokens", batch_max_tokens as f64);

                let min_size = if waiting_tokens >= max_waiting_tokens {
                    // If we didn't onboard any new requests since >= max_waiting_tokens, we try
                    // to add a new batch even though its size might be small
                    None
                } else {
                    // Minimum batch size
                    Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
                };

                let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
                let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize);

                // Try to get a new batch
                if let Some((mut new_entries, new_batch, span)) = queue
                    .next_batch(min_size, max_size, max_batch_prefill_tokens, token_budget)
                    .await
                {
                    // Tracking metrics
                    if min_size.is_some() {
                        metrics::increment_counter!("tgi_batch_concat", "reason" => "backpressure");
                    } else {
                        metrics::increment_counter!("tgi_batch_concat", "reason" => "wait_exceeded");
                    }

                    entries.iter_mut().for_each(|(_, entry)| {
                        // Create a new span to add the info that this entry is waiting
                        // because a new batch is being computed
                        let entry_waiting_span = info_span!(parent: &entry.span, "waiting");
                        // Add relationships
                        span.follows_from(&entry_waiting_span);
                        entry_waiting_span.follows_from(&span);
                        // Update entry
                        entry.temp_span = Some(entry_waiting_span);
                    });

                    // Generate one token for this new batch to have the attention past in cache
                    let new_cached_batch =
                        prefill(&mut client, new_batch, &mut new_entries, &generation_health)
                            .instrument(span)
                            .await;
                    // Reset waiting counter
                    waiting_tokens = 1;
                    // Extend current batch with the new batch
                    if let Some(new_cached_batch) = new_cached_batch {
                        entries.extend(new_entries);
                        batches.push(new_cached_batch);
                    }
                }

                // Create span for this batch to add context to inference calls
                let next_batch_size = entries.len();
                let next_batch_span =
                    info_span!(parent: None, "batch", batch_size = next_batch_size);
                entries.iter_mut().for_each(|(_, entry)| {
                    // Create a new span to link the batch back to this entry
                    let entry_batch_span = info_span!(parent: &entry.span, "infer");
                    // Add relationships
                    next_batch_span.follows_from(&entry_batch_span);
                    entry_batch_span.follows_from(&next_batch_span);
                    // Update entry
                    entry.temp_span = Some(entry_batch_span);
                });

                cached_batch = decode(&mut client, batches, &mut entries, &generation_health)
                    .instrument(next_batch_span)
                    .await;
                waiting_tokens += 1;
            }
            metrics::gauge!("tgi_batch_current_size", 0.0);
            metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
        }
    }
}

#[instrument(skip_all)]
async fn prefill(
    client: &mut ShardedClient,
    batch: Batch,
    entries: &mut IntMap<u64, Entry>,
    generation_health: &Arc<AtomicBool>,
) -> Option<CachedBatch> {
    let start_time = Instant::now();
    let batch_id = batch.id;
    metrics::increment_counter!("tgi_batch_inference_count", "method" => "prefill");

    match client.prefill(batch).await {
        Ok((generations, next_batch, timings)) => {
            // Update health
            generation_health.store(true, Ordering::SeqCst);

            let start_filtering_time = Instant::now();
            // Send generated tokens and filter stopped entries
            filter_send_generations(generations, entries);

            // Filter next batch and remove requests that were stopped
            let next_batch = filter_batch(client, next_batch, entries).await;

            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "prefill");
            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "prefill");
            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "prefill");
            metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "prefill");
            metrics::increment_counter!("tgi_batch_inference_success", "method" => "prefill");
            next_batch
        }
        // If we have an error, we discard the whole batch
        Err(err) => {
            // Update health
            generation_health.store(false, Ordering::SeqCst);
            let _ = client.clear_cache(Some(batch_id)).await;
            send_errors(err, entries);
            metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill");
            None
        }
    }
}

#[instrument(skip_all)]
async fn decode(
    client: &mut ShardedClient,
    batches: Vec<CachedBatch>,
    entries: &mut IntMap<u64, Entry>,
    generation_health: &Arc<AtomicBool>,
) -> Option<CachedBatch> {
    let start_time = Instant::now();
    let batch_ids: Vec<u64> = batches.iter().map(|b| b.id).collect();
    metrics::increment_counter!("tgi_batch_inference_count", "method" => "decode");

    match client.decode(batches).await {
        Ok((generations, next_batch, timings)) => {
            // Update health
            generation_health.store(true, Ordering::SeqCst);

            let start_filtering_time = Instant::now();
            // Send generated tokens and filter stopped entries
            filter_send_generations(generations, entries);

            // Filter next batch and remove requests that were stopped
            let next_batch = filter_batch(client, next_batch, entries).await;

            if let Some(concat_duration) = timings.concat {
                metrics::histogram!("tgi_batch_concat_duration", concat_duration.as_secs_f64(), "method" => "decode");
            }
            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "decode");
            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "decode");
            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "decode");
            metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "decode");
            metrics::increment_counter!("tgi_batch_inference_success", "method" => "decode");
            next_batch
        }
        // If we have an error, we discard the whole batch
        Err(err) => {
            generation_health.store(false, Ordering::SeqCst);
            for id in batch_ids {
                let _ = client.clear_cache(Some(id)).await;
            }
            send_errors(err, entries);
            metrics::increment_counter!("tgi_batch_inference_failure", "method" => "decode");
            None
        }
    }
}

/// Filter a `batch` and remove all requests not present in `entries`
#[instrument(skip_all)]
async fn filter_batch(
    client: &mut ShardedClient,
    next_batch: Option<CachedBatch>,
    entries: &IntMap<u64, Entry>,
) -> Option<CachedBatch> {
    let mut batch = next_batch?;

    // No need to filter
    if batch.size as usize == entries.len() {
        return Some(batch);
    }

    let id = batch.id;

    // Retain only requests that are still in entries
    batch.request_ids.retain(|id| entries.contains_key(id));

    if batch.request_ids.is_empty() {
        // All requests have been filtered out
        // Next batch is now empty
        // Clear it from the Python shards cache
        // We unwrap here as we need to panic since we cannot recover if this method fails
        client.clear_cache(Some(id)).await.unwrap();
        None
    } else {
        // Filter Python shard cache
        // We unwrap here as we need to panic since we cannot recover if this method fails
        client.filter_batch(id, batch.request_ids).await.unwrap()
    }
}

/// Send one or multiple `InferStreamResponse` to Infer for all `entries`
/// and filter entries
#[instrument(skip_all)]
fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entry>) {
    generations.into_iter().for_each(|generation| {
        let id = generation.request_id;
        // Get entry
        // We can `expect` here as the request id should always be in the entries
        let entry = entries
            .get(&id)
            .expect("ID not found in entries. This is a bug.");

        // Create and enter a span to link this function back to the entry
        let _span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_generation", generation = ?generation).entered();
        // Send generation responses back to the infer task
        // If the receive an error from the Flume channel, it means that the client dropped the
        // request and we need to stop generating hence why we unwrap_or(true)
        let stopped = send_responses(generation, entry).map_err(|err| {
            tracing::error!("Entry response channel error.");
            metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
            err
        }).unwrap_or(true);
        if stopped {
            entries.remove(&id).expect("ID not found in entries. This is a bug.");
        }
    });
}

/// Send responses through the `entry` response channel
fn send_responses(
    generation: Generation,
    entry: &Entry,
) -> Result<bool, Box<SendError<Result<InferStreamResponse, InferError>>>> {
    // Return directly if the channel is disconnected
    if entry.response_tx.is_closed() {
        metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
        return Ok(true);
    }

    let mut stopped = false;

    if let Some(prefill_tokens) = generation.prefill_tokens {
        // Create Token objects
        // We do that here instead of in the Python code as Rust for loops are faster
        let prefill_tokens = prefill_tokens
            .ids
            .into_iter()
            .zip(prefill_tokens.logprobs)
            .zip(prefill_tokens.texts)
            .map(|((id, logprob), text)| PrefillToken { id, text, logprob })
            .collect();

        // Send message
        entry
            .response_tx
            .send(Ok(InferStreamResponse::Prefill(prefill_tokens)))?;
    }

    // Create last Token
    let tokens_ = generation.tokens.expect("Non empty tokens in generation");
    let n = tokens_.ids.len();
    metrics::histogram!("tgi_request_skipped_tokens", (n - 1) as f64);
    let mut iterator = tokens_
        .ids
        .into_iter()
        .zip(tokens_.logprobs)
        .zip(tokens_.texts)
        .zip(tokens_.is_special)
        .enumerate()
        .peekable();
    while let Some((i, (((id, logprob), text), special))) = iterator.next() {
        let token = Token {
            id,
            text,
            logprob,
            special,
        };
        let top_tokens = if let Some(top_tokens_) = generation.top_tokens.get(i) {
            top_tokens_
                .ids
                .iter()
                .zip(top_tokens_.logprobs.iter())
                .zip(top_tokens_.texts.iter())
                .zip(top_tokens_.is_special.iter())
                .map(|(((&id, &logprob), text), &special)| Token {
                    id,
                    text: text.to_string(),
                    logprob,
                    special,
                })
                .collect()
        } else {
            vec![]
        };
        match (&generation.generated_text, iterator.peek()) {
            (Some(generated_text), None) => {
                // Generation has ended
                stopped = true;
                // Send message
                entry.response_tx.send(Ok(InferStreamResponse::End {
                    token,
                    top_tokens,
                    generated_text: GeneratedText::from(generated_text.clone()),
                    queued: entry.queue_time,
                    start: entry.batch_time.unwrap(),
                }))?;
            }
            _ => {
                // Send message
                entry
                    .response_tx
                    .send(Ok(InferStreamResponse::Intermediate { token, top_tokens }))?;
            }
        }
    }

    Ok(stopped)
}

/// Send errors to Infer for all `entries`
#[instrument(skip_all)]
fn send_errors(error: ClientError, entries: &mut IntMap<u64, Entry>) {
    entries.drain().for_each(|(_, entry)| {
        // Create and enter a span to link this function back to the entry
        let _send_error_span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_error").entered();
        let err = InferError::GenerationError(error.to_string());
        metrics::increment_counter!("tgi_request_failure", "err" => "generation");
        tracing::error!("{err}");

        // unwrap_or is valid here as we don't care if the receiver is gone.
        entry
            .response_tx
            .send(Err(err))
            .unwrap_or(());
    });
}

impl From<text_generation_client::v3::GeneratedText> for GeneratedText {
    fn from(value: text_generation_client::v3::GeneratedText) -> Self {
        let v3_finish_reason =
            text_generation_client::v3::FinishReason::try_from(value.finish_reason).unwrap();
        let finish_reason = match v3_finish_reason {
            text_generation_client::v3::FinishReason::Length => FinishReason::Length,
            text_generation_client::v3::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
            text_generation_client::v3::FinishReason::StopSequence => FinishReason::StopSequence,
        };

        Self {
            text: value.text,
            generated_tokens: value.generated_tokens,
            finish_reason,
            seed: value.seed,
        }
    }
}

// tests
#[cfg(test)]
mod tests {
    use crate::infer::raise_exception;
    use crate::{ChatTemplateInputs, TextMessage};
    use minijinja::Environment;

    #[test]
    fn test_chat_template() {
        let env = Environment::new();

        let source = r#"
        {% for message in messages %}
            {% if message['role'] == 'system' %}
                {% if message['content']%}
                    {{'### System:\n' + message['content']+'\n\n'}}
                {% endif %}
            {% elif message['role'] == 'user' %}
                {{'### User:\n' + message['content']+'\n\n'}}
            {% elif message['role'] == 'assistant' %}
                {{'### Assistant:\n'  + message['content']}}
            {% endif %}
            {% if loop.last and add_generation_prompt %}
                {{ '### Assistant:\n' }}
            {% endif %}
        {% endfor %}"#;

        // trim all the whitespace
        let source = source
            .lines()
            .map(|line| line.trim())
            .collect::<Vec<&str>>()
            .join("");

        let tmpl = env.template_from_str(&source);

        let chat_template_inputs = ChatTemplateInputs {
            messages: vec![
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi!".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "Hello how can I help?".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "What is Deep Learning?".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "magic!".to_string(),
                },
            ],
            bos_token: Some("[BOS]"),
            eos_token: Some("[EOS]"),
            add_generation_prompt: true,
            ..Default::default()
        };

        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();

        assert_eq!(
            result,
            "### User:\nHi!\n\n### Assistant:\nHello how can I help?### User:\nWhat is Deep Learning?\n\n### Assistant:\nmagic!### Assistant:\n"
        );
    }

    #[test]
    fn test_chat_template_invalid_with_raise() {
        let mut env = Environment::new();
        env.add_function("raise_exception", raise_exception);

        let source = r#"
        {{ bos_token }}
        {% for message in messages %}
        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
        {% endif %}
        {% if message['role'] == 'user' %}
        {{ '[INST] ' + message['content'] + ' [/INST]' }}
        {% elif message['role'] == 'assistant' %}
        {{ message['content'] + eos_token}}
        {% else %}
        {{ raise_exception('Only user and assistant roles are supported!') }}
        {% endif %}
        {% endfor %}"#;

        // trim all the whitespace
        let source = source
            .lines()
            .map(|line| line.trim())
            .collect::<Vec<&str>>()
            .join("");

        let tmpl = env.template_from_str(&source);

        let chat_template_inputs = ChatTemplateInputs {
            messages: vec![
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi!".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi again!".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "Hello how can I help?".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "What is Deep Learning?".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "magic!".to_string(),
                },
            ],
            bos_token: Some("[BOS]"),
            eos_token: Some("[EOS]"),
            add_generation_prompt: true,
            ..Default::default()
        };

        let result = tmpl.unwrap().render(chat_template_inputs); //.err().unwrap();

        match result {
            Ok(_) => panic!("Should have failed"),
            Err(e) => {
                assert_eq!(
                    e.detail().unwrap(),
                    "Conversation roles must alternate user/assistant/user/assistant/..."
                );
            }
        }
    }

    #[test]
    fn test_chat_template_valid_with_raise() {
        let mut env = Environment::new();
        env.add_function("raise_exception", raise_exception);

        let source = r#"
        {{ bos_token }}
        {% for message in messages %}
        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
        {% endif %}
        {% if message['role'] == 'user' %}
        {{ '[INST] ' + message['content'] + ' [/INST]' }}
        {% elif message['role'] == 'assistant' %}
        {{ message['content'] + eos_token}}
        {% else %}
        {{ raise_exception('Only user and assistant roles are supported!') }}
        {% endif %}
        {% endfor %}"#;

        // trim all the whitespace
        let source = source
            .lines()
            .map(|line| line.trim())
            .collect::<Vec<&str>>()
            .join("");

        let tmpl = env.template_from_str(&source);

        let chat_template_inputs = ChatTemplateInputs {
            messages: vec![
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi!".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "Hello how can I help?".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "What is Deep Learning?".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "magic!".to_string(),
                },
            ],
            bos_token: Some("[BOS]"),
            eos_token: Some("[EOS]"),
            add_generation_prompt: true,
            ..Default::default()
        };

        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
        assert_eq!(result, "[BOS][INST] Hi! [/INST]Hello how can I help?[EOS][INST] What is Deep Learning? [/INST]magic![EOS]");
    }

    #[test]
    fn test_chat_template_valid_with_add_generation_prompt() {
        let mut env = Environment::new();
        env.add_function("raise_exception", raise_exception);

        let source = r#"
        {% for message in messages %}
        {{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}
        {% endfor %}
        {% if add_generation_prompt %}
            {{ '<|im_start|>assistant\n' }}
        {% endif %}"#;

        // trim all the whitespace
        let source = source
            .lines()
            .map(|line| line.trim())
            .collect::<Vec<&str>>()
            .join("");

        let tmpl = env.template_from_str(&source);

        let chat_template_inputs = ChatTemplateInputs {
            messages: vec![
                TextMessage {
                    role: "user".to_string(),
                    content: "Hi!".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "Hello how can I help?".to_string(),
                },
                TextMessage {
                    role: "user".to_string(),
                    content: "What is Deep Learning?".to_string(),
                },
                TextMessage {
                    role: "assistant".to_string(),
                    content: "magic!".to_string(),
                },
            ],
            bos_token: Some("[BOS]"),
            eos_token: Some("[EOS]"),
            add_generation_prompt: true,
            ..Default::default()
        };

        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
        assert_eq!(result, "<|im_start|>user\nHi!<|im_end|>\n<|im_start|>assistant\nHello how can I help?<|im_end|>\n<|im_start|>user\nWhat is Deep Learning?<|im_end|>\n<|im_start|>assistant\nmagic!<|im_end|>\n<|im_start|>assistant\n");
    }

    struct ChatTemplateTestItem {
        name: &'static str,
        chat_template: &'static str,
        input: ChatTemplateInputs<'static>,
        target: &'static str,
    }

    #[test]
    fn test_many_chat_templates() {
        let example_chat = vec![
            TextMessage {
                role: "user".to_string(),
                content: "Hello, how are you?".to_string(),
            },
            TextMessage {
                role: "assistant".to_string(),
                content: "I'm doing great. How can I help you today?".to_string(),
            },
            TextMessage {
                role: "user".to_string(),
                content: "I'd like to show off how chat templating works!".to_string(),
            },
        ];

        let example_chat_with_system = [TextMessage {
            role: "system".to_string(),
            content: "You are a friendly chatbot who always responds in the style of a pirate"
                .to_string(),
        }]
        .iter()
        .chain(&example_chat)
        .cloned()
        .collect::<Vec<_>>();

        let test_default_templates = vec![
            ChatTemplateTestItem {
                name: "_base",
                chat_template: "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some(""),
                    ..Default::default()
                },
                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
            },
            ChatTemplateTestItem {
                name: "blenderbot",
                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "blenderbot_small",
                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "bloom",
                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "gpt_neox",
                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("<|endoftext|>"),
                    ..Default::default()
                },
                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
            },
            ChatTemplateTestItem {
                name: "gpt2",
                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("<|endoftext|>"),
                    ..Default::default()
                },
                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
            },
            ChatTemplateTestItem {
                name: "llama",
                // NOTE: the `.strip()` has been replaced with `| trim` in the following template
                chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token +'[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content | trim + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat_with_system.clone(),
                    add_generation_prompt: true,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s>[INST] <<SYS>>\nYou are a friendly chatbot who always responds in the style of a pirate\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
            },
            ChatTemplateTestItem {
                name: "whisper",
                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: true,
                    bos_token: Some(""),
                    eos_token: Some("<|endoftext|>"),
                    ..Default::default()
                },
                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
            },
        ];

        #[allow(unused_variables)] // name is unused
        for ChatTemplateTestItem {
            name,
            chat_template,
            input,
            target,
        } in test_default_templates
        {
            let mut env = Environment::new();
            env.add_function("raise_exception", raise_exception);
            let tmpl = env.template_from_str(chat_template);
            let result = tmpl.unwrap().render(input).unwrap();
            assert_eq!(result, target);
        }

        let test_custom_templates = vec![
            ChatTemplateTestItem {
                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=false)",
                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat_with_system.clone(),
                    add_generation_prompt: false,
                    bos_token: Some(""),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHello, how are you?</s><|assistant|>\nI'm doing great. How can I help you today?</s><|user|>\nI'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=true)",
                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
                input: ChatTemplateInputs {
                    messages: vec![
                        TextMessage {
                            role: "system".to_string(),
                            content: "You are a friendly chatbot who always responds in the style of a pirate".to_string(),
                        },
                        TextMessage {
                            role: "user".to_string(),
                            content: "How many helicopters can a human eat in one sitting?".to_string(),
                        },
                    ],
                    add_generation_prompt: true,
                    bos_token: Some(""),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHow many helicopters can a human eat in one sitting?</s><|assistant|>",
            },
            ChatTemplateTestItem {
                name: "HuggingFaceH4/zephyr-7b-gemma-v0.1",
                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<bos>"),
                    eos_token: Some("<eos>"),
                    ..Default::default()
                },
                target: "<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
            },
            ChatTemplateTestItem {
                name: "mistralai/Mistral-7B-Instruct-v0.1",
                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]",
            },
            ChatTemplateTestItem {
                name: "mistralai/Mixtral-8x7B-Instruct-v0.1",
                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works! [/INST]",
            },
            ChatTemplateTestItem {
                name: "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
            },
            ChatTemplateTestItem {
                name: "openchat/openchat-3.5-0106",
                // `.title()` has been replaced with `| upper` in the following template
                chat_template: "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + (message['role'] | title) + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s>GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>",
            },
            ChatTemplateTestItem {
                name: "upstage/SOLAR-10.7B-Instruct-v1.0",
                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "codellama/CodeLlama-70b-Instruct-hf",
                // NOTE: `.strip()` has been replaced with `| trim` in the following template
                chat_template: "{% if messages[0]['role'] == 'system' %}{% set user_index = 1 %}{% else %}{% set user_index = 0 %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != ((loop.index0 + user_index) % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{{ '<s>' }}{% endif %}{% set content = 'Source: ' + message['role'] + '\\n\\n ' + message['content'] | trim %}{{ content + ' <step> ' }}{% endfor %}{{'Source: assistant\\nDestination: user\\n\\n '}}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s>Source: user\n\n Hello, how are you? <step> Source: assistant\n\n I'm doing great. How can I help you today? <step> Source: user\n\n I'd like to show off how chat templating works! <step> Source: assistant\nDestination: user\n\n ",
            },
            ChatTemplateTestItem {
                name: "Deci/DeciLM-7B-instruct",
                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '### User:\\n' + message['content'] }}\n{% elif message['role'] == 'system' %}\n{{ '### System:\\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '### Assistant:\\n'  + message['content'] }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '### Assistant:' }}\n{% endif %}\n{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "### User:\nHello, how are you?### Assistant:\nI'm doing great. How can I help you today?### User:\nI'd like to show off how chat templating works!",
            },
            ChatTemplateTestItem {
                name: "Qwen/Qwen1.5-72B-Chat",
                chat_template: "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!",
            },
            ChatTemplateTestItem {
                name: "deepseek-ai/deepseek-llm-7b-chat",
                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\\n\\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<｜begin▁of▁sentence｜>"),
                    eos_token: Some("<｜end▁of▁sentence｜>"),
                    ..Default::default()
                },
                target: "<｜begin▁of▁sentence｜>User: Hello, how are you?\n\nAssistant: I'm doing great. How can I help you today?<｜end▁of▁sentence｜>User: I'd like to show off how chat templating works!\n\n",
            },
            ChatTemplateTestItem {
                name: "h2oai/h2o-danube-1.8b-chat",
                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompt|>' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|answer|>'  + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|answer|>' }}{% endif %}{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<|prompt|>Hello, how are you?</s><|answer|>I'm doing great. How can I help you today?</s><|prompt|>I'd like to show off how chat templating works!</s>",
            },
            ChatTemplateTestItem {
                name: "internlm/internlm2-chat-7b",
                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "<s><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
            },
            ChatTemplateTestItem {
                name: "TheBloke/deepseek-coder-33B-instruct-AWQ",
                chat_template: "{%- set found_item = false -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set found_item = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<｜begin▁of▁sentence｜>"),
                    eos_token: Some("<|EOT|>"),
                    ..Default::default()
                },
                target: "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n### Response:\n",
            },
            ChatTemplateTestItem {
                name: "ericzzz/falcon-rw-1b-chat",
                // `.strip()` has been replaced with `| trim` in the following template
                chat_template: "{% for message in messages %}{% if loop.index > 1 and loop.previtem['role'] != 'assistant' %}{{ ' ' }}{% endif %}{% if message['role'] == 'system' %}{{ '[SYS] ' + message['content'] | trim }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '[RESP] '  + message['content'] + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' [RESP] ' }}{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<|endoftext|>"),
                    eos_token: Some("<|endoftext|>"),
                    ..Default::default()
                },
                target: "[INST] Hello, how are you? [RESP] I'm doing great. How can I help you today?<|endoftext|>[INST] I'd like to show off how chat templating works!",
            },
            ChatTemplateTestItem {
                name: "abacusai/Smaug-34B-v0.1",
                chat_template: "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "Hello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
            },
            ChatTemplateTestItem {
                name: "maywell/Synatra-Mixtral-8x7B",
                chat_template: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n{% for message in messages %}{% if message['role'] == 'user' %}### Instruction:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'assistant' %}### Response:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'system' %}{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}\n### Response:\n{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "Below is an instruction that describes a task. Write a response that appropriately completes the request.### Instruction:Hello, how are you?### Response:I'm doing great. How can I help you today?### Instruction:I'd like to show off how chat templating works!",
            },
            ChatTemplateTestItem {
                name: "deepseek-ai/deepseek-coder-33b-instruct",
                chat_template: "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<｜begin▁of▁sentence｜>"),
                    eos_token: Some("</EOT>"),
                    ..Default::default()
                },
                target: "<｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n",
            },
            // NOT INCLUDED
            // - meetkai/functionary-medium-v3.2
            // - fireworks-ai/firefunction-v1
            // https://github
            ChatTemplateTestItem {
                name: "maywell/PiVoT-MoE",
                chat_template: "{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content']|trim }}{% elif message['role'] == 'user' %}### Instruction: {{ message['content']|trim }}{% elif message['role'] == 'assistant' %}### Response: {{ message['content']|trim }}{% elif message['role'] == 'user_context' %}### Input: {{ message['content']|trim }}{% endif %}{% if not loop.last %}\n{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}### Response:{% endif %}",
                input: ChatTemplateInputs {
                    messages: example_chat_with_system.clone(),
                    add_generation_prompt: false,
                    bos_token: Some("<s>"),
                    eos_token: Some("</s>"),
                    ..Default::default()
                },
                target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
            },
        ];

        #[allow(unused_variables)] // name is unused
        for ChatTemplateTestItem {
            name,
            chat_template,
            input,
            target,
        } in test_custom_templates
        {
            let mut env = Environment::new();
            env.add_function("raise_exception", raise_exception);
            // trim all the whitespace
            let chat_template = chat_template
                .lines()
                .map(|line| line.trim())
                .collect::<Vec<&str>>()
                .join("");

            let tmpl = env.template_from_str(&chat_template);
            let result = tmpl.unwrap().render(input).unwrap();
            assert_eq!(result, target);
        }
    }
}
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								/// Batching and inference logic
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								use crate::infer::v3::queue::{Entry, Queue};
 								use crate::infer::{
 								    GenerateStreamResponse, GeneratedText, InferError, InferStreamResponse, Scheduler,
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								};
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								use crate::validation::ValidGenerateRequest;
 								use crate::{FinishReason, PrefillToken, Token};
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								use nohash_hasher::IntMap;
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								use std::sync::{
 								    atomic::{AtomicBool, Ordering},
 								    Arc,
 								};
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								use text_generation_client::v3::{Batch, CachedBatch, Generation, ShardedClient};
 								use text_generation_client::ClientError;
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								use tokio::sync::mpsc::error::SendError;
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								use tokio::sync::{mpsc, Notify, OwnedSemaphorePermit};
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								use tokio::time::Instant;
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								use tokio_stream::wrappers::UnboundedReceiverStream;
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								use tracing::{info_span, instrument, Instrument, Span};
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								pub(crate) struct SchedulerV3 {
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								    /// Request queue
 								    queue: Queue,
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								    /// Notify batcher on queue appends
 								    batching_task_notifier: Arc<Notify>,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								}
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								impl SchedulerV3 {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								    #[allow(clippy::too_many_arguments)]
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    pub(crate) fn new(
 								        client: ShardedClient,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								        waiting_served_ratio: f32,
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								        max_batch_prefill_tokens: u32,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								        max_batch_total_tokens: u32,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        max_waiting_tokens: usize,
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								        max_batch_size: Option<usize>,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								        requires_padding: bool,
-												feat: add mistral model (#1071)


											
										
										
											2023-09-28 07:55:47 +00:00
+								        window_size: Option<u32>,
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								        speculate: u32,
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								        generation_health: Arc<AtomicBool>,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    ) -> Self {
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								        let queue = Queue::new(requires_padding, 16, window_size, speculate);
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        let batching_task_notifier = Arc::new(Notify::new());
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
 								        // Spawn batching background task that contains all the inference logic
 								        tokio::spawn(batching_task(
 								            client,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            waiting_served_ratio,
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								            max_batch_prefill_tokens,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            max_batch_total_tokens,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            max_waiting_tokens,
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								            max_batch_size,
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								            queue.clone(),
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								            batching_task_notifier.clone(),
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            generation_health,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        ));
 								        Self {
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								            queue,
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								            batching_task_notifier,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        }
 								    }
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								}
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								impl Scheduler for SchedulerV3 {
-												fix: do not leak inputs on error (#1228)

Close #1225
											
										
										
											2023-11-20 09:33:44 +00:00
+								    #[instrument(skip_all)]
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								    fn schedule(
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        &self,
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        request: ValidGenerateRequest,
 								        permit: OwnedSemaphorePermit,
-												feat: supports openai chat completions API (#1427)

This PR adds support to make TGI a drop in replacement for OpenAI
clients by exposing the same HTTP interface.

Notes
- TGI inits a single model at startup so the `model` field is unused in
HTTP requests.
- `max_tokens` and `stream` should work as expected but other params may
be (unimplemented or not supported)

General approach
- fetch the `tokenizer_config` at startup from the hub
- pass `tokenizer_config` into `Infer` so we have it at request time
- use the `chat_template` on the config to format chat request
- parse jinja template and render chat string
- pass inputs into existing generate function
- wrap generation output in expected structure before returning

# How to test

### Streaming curl
```bash
curl localhost:3000/v1/chat/completions \
    -X POST \
    -d '{
  "model": "tgi",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": "What is deep learning?"
    }
  ],
  "stream": true,
  "max_tokens": 20
}' \
    -H 'Content-Type: application/json'
```


It is also possible to use the `openai` python library and change the
base url

###  🌊 STREAMING REQUEST
```python
from openai import OpenAI

# init the client but point it to TGI
client = OpenAI(
    base_url="http://localhost:3000/v1",
    api_key="not needed for a local LLM"
)

chat_completion = client.chat.completions.create(
    model="tgi",
    messages=[
        {"role": "system", "content": "You are a helpful assistant." },
        {"role": "user", "content": "What is deep learning?"}
    ],
    stream=True
)

# iterate and print stream
for message in chat_completion:
    print(message)

# ChatCompletionChunk(id='', choices=[Choice(delta=ChoiceDelta(content=' that', function_call=None, role='assistant', tool_calls=None), finish_reason=None, index=2, logprobs=None)], created=1704486761, model='', object='text_completion', system_fingerprint='')
```

### 🚗 SYNCHRONOUS REQUEST
```python
from openai import OpenAI

# init the client but point it to TGI
client = OpenAI(
    base_url="http://localhost:3000/v1",
    api_key="not needed for a local LLM"
)

chat_completion = client.chat.completions.create(
    model="tgi",
    messages=[
        {"role": "system", "content": "You are a helpful assistant." },
        {"role": "user", "content": "What is deep learning?"}
    ],
    stream=False
)

print(chat_completion)
# ChatCompletion(id='', choices=[Choice(finish_reason=None, index=0, logprobs=None, message=ChatCompletionMessage(content='\nDeep learning is a new field of research that has been gaining traction in the last ...', role='assistant', function_call=None, tool_calls=None))], created=1704486762, model='', object='text_completion', system_fingerprint='', usage=CompletionUsage(completion_tokens=100, prompt_tokens=76, total_tokens=176))
```


## How to run dev

```bash
cd text-generation-inference/server
MASTER_ADDR=127.0.0.1 MASTER_PORT=5555 text-generation-server serve --trust-remote-code gpt2
```

***note many of the existing `chat_templates` use non standard `jinja`
(ie. adding a `raise` to the template) which will throw an error when
parsing; hence using `upstage/SOLAR-10.7B-Instruct-v1.0` since it has a
valid template
```bash
cd text-generation-inference/router
cargo run -- --tokenizer-name upstage/SOLAR-10.7B-Instruct-v1.0
```

trigger
```bash
curl localhost:3000/v1/chat/completions \
    -X POST \
    -d '{ "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "What is the IP address of the Google DNS servers?" } ], "stream": true, "max_tokens": 20, "logprobs": true }' \
    -H 'Content-Type: application/json'
```

^ supports `stream: true` and `stream: false` requests
											
										
										
											2024-01-16 10:07:41 +00:00
+								    ) -> Result<GenerateStreamResponse, InferError> {
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        // MPSC channel to communicate with the background batching task
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								        let (response_tx, response_rx) = mpsc::unbounded_channel();
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        let input_length = request.input_length;
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								        // Append the request to the queue
 								        self.queue.append(Entry {
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								            request,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            response_tx,
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								            span: Span::current(),
 								            temp_span: None,
 								            queue_time: Instant::now(),
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            batch_time: None,
 								        });
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								        // Notify the background task that we have a new entry in the queue that needs
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        // to be batched
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        self.batching_task_notifier.notify_one();
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
 								        // Return stream
-												Return prompt vs generated tokens. (#1436)

# What does this PR do?

Fixes #637 
 
<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-01-11 18:01:43 +00:00
+								        Ok((
 								            permit,
 								            input_length,
 								            UnboundedReceiverStream::new(response_rx),
 								        ))
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    }
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								}
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								/// Batching logic
 								/// Will be launched in a background Tokio task
 								///
 								/// Batches requests and sends them to the inference server
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								#[allow(clippy::too_many_arguments)]
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								pub(crate) async fn batching_task(
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    mut client: ShardedClient,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								    waiting_served_ratio: f32,
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								    max_batch_prefill_tokens: u32,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								    max_batch_total_tokens: u32,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    max_waiting_tokens: usize,
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								    max_batch_size: Option<usize>,
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								    queue: Queue,
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								    notifier: Arc<Notify>,
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								    generation_health: Arc<AtomicBool>,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								) {
 								    // Infinite loop
 								    loop {
 								        // Wait for a notification from the Infer struct
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        notifier.notified().await;
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								        // Get the next batch from the queue
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        // This batch might be smaller than the maximum batch size if there are not enough requests
-												feat(router): use background task to manage request queue (#52)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-02-02 13:59:27 +00:00
+								        // waiting in the queue
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								        while let Some((mut entries, batch, span)) = queue
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								            .next_batch(
 								                None,
 								                max_batch_size,
 								                max_batch_prefill_tokens,
 								                max_batch_total_tokens,
 								            )
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								            .await
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								        {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            let mut cached_batch = prefill(&mut client, batch, &mut entries, &generation_health)
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								                .instrument(span)
 								                .await;
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            let mut waiting_tokens = 1;
 								            // We loop until we do not receive any cached batch from the inference server (== until
 								            // all requests have met their stopping criteria)
 								            while let Some(batch) = cached_batch {
 								                // Get current batch info
 								                let batch_size = batch.size;
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								                let batch_max_tokens = batch.max_tokens;
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								                let mut batches = vec![batch];
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								                metrics::gauge!("tgi_batch_current_size", batch_size as f64);
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								                metrics::gauge!("tgi_batch_current_max_tokens", batch_max_tokens as f64);
 								                let min_size = if waiting_tokens >= max_waiting_tokens {
 								                    // If we didn't onboard any new requests since >= max_waiting_tokens, we try
 								                    // to add a new batch even though its size might be small
 								                    None
 								                } else {
 								                    // Minimum batch size
 								                    Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
 								                };
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								                let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								                let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize);
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
 								                // Try to get a new batch
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								                if let Some((mut new_entries, new_batch, span)) = queue
-												feat(router): add max_batch_size (#1542)

Some hardware require a maximum batch size.
											
										
										
											2024-02-09 11:38:41 +00:00
+								                    .next_batch(min_size, max_size, max_batch_prefill_tokens, token_budget)
-												feat(server): add paged attention to flash models (#516)

Closes #478
											
										
										
											2023-06-30 17:09:59 +00:00
+								                    .await
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								                {
 								                    // Tracking metrics
 								                    if min_size.is_some() {
 								                        metrics::increment_counter!("tgi_batch_concat", "reason" => "backpressure");
 								                    } else {
 								                        metrics::increment_counter!("tgi_batch_concat", "reason" => "wait_exceeded");
 								                    }
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								                    entries.iter_mut().for_each(|(_, entry)| {
 								                        // Create a new span to add the info that this entry is waiting
 								                        // because a new batch is being computed
 								                        let entry_waiting_span = info_span!(parent: &entry.span, "waiting");
 								                        // Add relationships
 								                        span.follows_from(&entry_waiting_span);
 								                        entry_waiting_span.follows_from(&span);
 								                        // Update entry
 								                        entry.temp_span = Some(entry_waiting_span);
 								                    });
 								                    // Generate one token for this new batch to have the attention past in cache
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								                    let new_cached_batch =
 								                        prefill(&mut client, new_batch, &mut new_entries, &generation_health)
 								                            .instrument(span)
 								                            .await;
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								                    // Reset waiting counter
 								                    waiting_tokens = 1;
 								                    // Extend current batch with the new batch
 								                    if let Some(new_cached_batch) = new_cached_batch {
 								                        entries.extend(new_entries);
 								                        batches.push(new_cached_batch);
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								                    }
 								                }
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								                // Create span for this batch to add context to inference calls
 								                let next_batch_size = entries.len();
 								                let next_batch_span =
 								                    info_span!(parent: None, "batch", batch_size = next_batch_size);
 								                entries.iter_mut().for_each(|(_, entry)| {
 								                    // Create a new span to link the batch back to this entry
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								                    let entry_batch_span = info_span!(parent: &entry.span, "infer");
-												fix(server): use server tokenizer as gt (#128)


											
										
										
											2023-03-16 11:12:26 +00:00
+								                    // Add relationships
 								                    next_batch_span.follows_from(&entry_batch_span);
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								                    entry_batch_span.follows_from(&next_batch_span);
 								                    // Update entry
 								                    entry.temp_span = Some(entry_batch_span);
 								                });
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								                cached_batch = decode(&mut client, batches, &mut entries, &generation_health)
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								                    .instrument(next_batch_span)
 								                    .await;
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								                waiting_tokens += 1;
 								            }
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								            metrics::gauge!("tgi_batch_current_size", 0.0);
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            metrics::gauge!("tgi_batch_current_max_tokens", 0.0);
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        }
 								    }
 								}
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								#[instrument(skip_all)]
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								async fn prefill(
 								    client: &mut ShardedClient,
 								    batch: Batch,
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    entries: &mut IntMap<u64, Entry>,
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								    generation_health: &Arc<AtomicBool>,
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								) -> Option<CachedBatch> {
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								    let start_time = Instant::now();
-												feat(server): clear cache on error (#143)


											
										
										
											2023-03-28 09:29:35 +00:00
+								    let batch_id = batch.id;
-												fix(router): use buckets for metrics histograms (#163)


											
										
										
											2023-04-09 18:13:28 +00:00
+								    metrics::increment_counter!("tgi_batch_inference_count", "method" => "prefill");
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
 								    match client.prefill(batch).await {
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
+								        Ok((generations, next_batch, timings)) => {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            // Update health
 								            generation_health.store(true, Ordering::SeqCst);
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
 								            let start_filtering_time = Instant::now();
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            // Send generated tokens and filter stopped entries
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								            filter_send_generations(generations, entries);
 								            // Filter next batch and remove requests that were stopped
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            let next_batch = filter_batch(client, next_batch, entries).await;
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
+								            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "prefill");
 								            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "prefill");
 								            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "prefill");
-												fix(router): use buckets for metrics histograms (#163)


											
										
										
											2023-04-09 18:13:28 +00:00
+								            metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "prefill");
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								            metrics::increment_counter!("tgi_batch_inference_success", "method" => "prefill");
 								            next_batch
 								        }
 								        // If we have an error, we discard the whole batch
 								        Err(err) => {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            // Update health
 								            generation_health.store(false, Ordering::SeqCst);
-												feat(server): clear cache on error (#143)


											
										
										
											2023-03-28 09:29:35 +00:00
+								            let _ = client.clear_cache(Some(batch_id)).await;
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								            send_errors(err, entries);
 								            metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill");
 								            None
 								        }
 								    }
 								}
 								#[instrument(skip_all)]
 								async fn decode(
 								    client: &mut ShardedClient,
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								    batches: Vec<CachedBatch>,
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								    entries: &mut IntMap<u64, Entry>,
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								    generation_health: &Arc<AtomicBool>,
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								) -> Option<CachedBatch> {
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								    let start_time = Instant::now();
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								    let batch_ids: Vec<u64> = batches.iter().map(|b| b.id).collect();
-												fix(router): use buckets for metrics histograms (#163)


											
										
										
											2023-04-09 18:13:28 +00:00
+								    metrics::increment_counter!("tgi_batch_inference_count", "method" => "decode");
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
 								    match client.decode(batches).await {
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
+								        Ok((generations, next_batch, timings)) => {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            // Update health
 								            generation_health.store(true, Ordering::SeqCst);
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
 								            let start_filtering_time = Instant::now();
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            // Send generated tokens and filter stopped entries
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								            filter_send_generations(generations, entries);
 								            // Filter next batch and remove requests that were stopped
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								            let next_batch = filter_batch(client, next_batch, entries).await;
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
-												feat: add more latency metrics in forward (#1346)


											
										
										
											2023-12-14 14:59:38 +00:00
+								            if let Some(concat_duration) = timings.concat {
 								                metrics::histogram!("tgi_batch_concat_duration", concat_duration.as_secs_f64(), "method" => "decode");
 								            }
 								            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "decode");
 								            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "decode");
 								            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "decode");
-												fix(router): use buckets for metrics histograms (#163)


											
										
										
											2023-04-09 18:13:28 +00:00
+								            metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "decode");
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								            metrics::increment_counter!("tgi_batch_inference_success", "method" => "decode");
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            next_batch
 								        }
 								        // If we have an error, we discard the whole batch
 								        Err(err) => {
-												feat(router): new healthcheck that skips the queue (#244)

Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
											
										
										
											2023-04-26 18:23:54 +00:00
+								            generation_health.store(false, Ordering::SeqCst);
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								            for id in batch_ids {
 								                let _ = client.clear_cache(Some(id)).await;
 								            }
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								            send_errors(err, entries);
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								            metrics::increment_counter!("tgi_batch_inference_failure", "method" => "decode");
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            None
 								        }
 								    }
 								}
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								/// Filter a `batch` and remove all requests not present in `entries`
 								#[instrument(skip_all)]
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								async fn filter_batch(
 								    client: &mut ShardedClient,
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								    next_batch: Option<CachedBatch>,
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								    entries: &IntMap<u64, Entry>,
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								) -> Option<CachedBatch> {
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								    let mut batch = next_batch?;
 								    // No need to filter
 								    if batch.size as usize == entries.len() {
 								        return Some(batch);
 								    }
 								    let id = batch.id;
 								    // Retain only requests that are still in entries
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								    batch.request_ids.retain(|id| entries.contains_key(id));
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								    if batch.request_ids.is_empty() {
-												feat(router): use number of tokens in batch as input for dynamic batching (#226)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
											
										
										
											2023-04-24 15:59:00 +00:00
+								        // All requests have been filtered out
 								        // Next batch is now empty
 								        // Clear it from the Python shards cache
 								        // We unwrap here as we need to panic since we cannot recover if this method fails
 								        client.clear_cache(Some(id)).await.unwrap();
 								        None
 								    } else {
 								        // Filter Python shard cache
 								        // We unwrap here as we need to panic since we cannot recover if this method fails
-												feat: decrease IPC proto size (#367)

Closes #307 #308
											
										
										
											2023-05-24 17:19:57 +00:00
+								        client.filter_batch(id, batch.request_ids).await.unwrap()
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								    }
 								}
 								/// Send one or multiple `InferStreamResponse` to Infer for all `entries`
 								/// and filter entries
 								#[instrument(skip_all)]
 								fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entry>) {
 								    generations.into_iter().for_each(|generation| {
 								        let id = generation.request_id;
 								        // Get entry
 								        // We can `expect` here as the request id should always be in the entries
 								        let entry = entries
 								            .get(&id)
 								            .expect("ID not found in entries. This is a bug.");
 								        // Create and enter a span to link this function back to the entry
 								        let _span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_generation", generation = ?generation).entered();
 								        // Send generation responses back to the infer task
 								        // If the receive an error from the Flume channel, it means that the client dropped the
 								        // request and we need to stop generating hence why we unwrap_or(true)
 								        let stopped = send_responses(generation, entry).map_err(|err| {
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								            tracing::error!("Entry response channel error.");
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								            metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
 								            err
 								        }).unwrap_or(true);
 								        if stopped {
 								            entries.remove(&id).expect("ID not found in entries. This is a bug.");
 								        }
 								    });
 								}
 								/// Send responses through the `entry` response channel
 								fn send_responses(
 								    generation: Generation,
 								    entry: &Entry,
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								) -> Result<bool, Box<SendError<Result<InferStreamResponse, InferError>>>> {
-												fix(router): add timeout on flume sends (#488)


											
										
										
											2023-06-23 12:58:28 +00:00
+								    // Return directly if the channel is disconnected
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								    if entry.response_tx.is_closed() {
 								        metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
-												fix(router): add timeout on flume sends (#488)


											
										
										
											2023-06-23 12:58:28 +00:00
+								        return Ok(true);
 								    }
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								    let mut stopped = false;
 								    if let Some(prefill_tokens) = generation.prefill_tokens {
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        // Create Token objects
 								        // We do that here instead of in the Python code as Rust for loops are faster
 								        let prefill_tokens = prefill_tokens
 								            .ids
 								            .into_iter()
 								            .zip(prefill_tokens.logprobs)
 								            .zip(prefill_tokens.texts)
 								            .map(|((id, logprob), text)| PrefillToken { id, text, logprob })
 								            .collect();
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								        // Send message
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								        entry
 								            .response_tx
 								            .send(Ok(InferStreamResponse::Prefill(prefill_tokens)))?;
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								    }
 								    // Create last Token
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								    let tokens_ = generation.tokens.expect("Non empty tokens in generation");
 								    let n = tokens_.ids.len();
 								    metrics::histogram!("tgi_request_skipped_tokens", (n - 1) as f64);
 								    let mut iterator = tokens_
 								        .ids
 								        .into_iter()
-												feat: supports openai chat completions API (#1427)

This PR adds support to make TGI a drop in replacement for OpenAI
clients by exposing the same HTTP interface.

Notes
- TGI inits a single model at startup so the `model` field is unused in
HTTP requests.
- `max_tokens` and `stream` should work as expected but other params may
be (unimplemented or not supported)

General approach
- fetch the `tokenizer_config` at startup from the hub
- pass `tokenizer_config` into `Infer` so we have it at request time
- use the `chat_template` on the config to format chat request
- parse jinja template and render chat string
- pass inputs into existing generate function
- wrap generation output in expected structure before returning

# How to test

### Streaming curl
```bash
curl localhost:3000/v1/chat/completions \
    -X POST \
    -d '{
  "model": "tgi",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": "What is deep learning?"
    }
  ],
  "stream": true,
  "max_tokens": 20
}' \
    -H 'Content-Type: application/json'
```


It is also possible to use the `openai` python library and change the
base url

###  🌊 STREAMING REQUEST
```python
from openai import OpenAI

# init the client but point it to TGI
client = OpenAI(
    base_url="http://localhost:3000/v1",
    api_key="not needed for a local LLM"
)

chat_completion = client.chat.completions.create(
    model="tgi",
    messages=[
        {"role": "system", "content": "You are a helpful assistant." },
        {"role": "user", "content": "What is deep learning?"}
    ],
    stream=True
)

# iterate and print stream
for message in chat_completion:
    print(message)

# ChatCompletionChunk(id='', choices=[Choice(delta=ChoiceDelta(content=' that', function_call=None, role='assistant', tool_calls=None), finish_reason=None, index=2, logprobs=None)], created=1704486761, model='', object='text_completion', system_fingerprint='')
```

### 🚗 SYNCHRONOUS REQUEST
```python
from openai import OpenAI

# init the client but point it to TGI
client = OpenAI(
    base_url="http://localhost:3000/v1",
    api_key="not needed for a local LLM"
)

chat_completion = client.chat.completions.create(
    model="tgi",
    messages=[
        {"role": "system", "content": "You are a helpful assistant." },
        {"role": "user", "content": "What is deep learning?"}
    ],
    stream=False
)

print(chat_completion)
# ChatCompletion(id='', choices=[Choice(finish_reason=None, index=0, logprobs=None, message=ChatCompletionMessage(content='\nDeep learning is a new field of research that has been gaining traction in the last ...', role='assistant', function_call=None, tool_calls=None))], created=1704486762, model='', object='text_completion', system_fingerprint='', usage=CompletionUsage(completion_tokens=100, prompt_tokens=76, total_tokens=176))
```


## How to run dev

```bash
cd text-generation-inference/server
MASTER_ADDR=127.0.0.1 MASTER_PORT=5555 text-generation-server serve --trust-remote-code gpt2
```

***note many of the existing `chat_templates` use non standard `jinja`
(ie. adding a `raise` to the template) which will throw an error when
parsing; hence using `upstage/SOLAR-10.7B-Instruct-v1.0` since it has a
valid template
```bash
cd text-generation-inference/router
cargo run -- --tokenizer-name upstage/SOLAR-10.7B-Instruct-v1.0
```

trigger
```bash
curl localhost:3000/v1/chat/completions \
    -X POST \
    -d '{ "model": "gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "What is the IP address of the Google DNS servers?" } ], "stream": true, "max_tokens": 20, "logprobs": true }' \
    -H 'Content-Type: application/json'
```

^ supports `stream: true` and `stream: false` requests
											
										
										
											2024-01-16 10:07:41 +00:00
+								        .zip(tokens_.logprobs)
 								        .zip(tokens_.texts)
 								        .zip(tokens_.is_special)
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								        .enumerate()
 								        .peekable();
 								    while let Some((i, (((id, logprob), text), special))) = iterator.next() {
 								        let token = Token {
 								            id,
 								            text,
 								            logprob,
 								            special,
 								        };
 								        let top_tokens = if let Some(top_tokens_) = generation.top_tokens.get(i) {
-												Rebased #617 (#868)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Vincent Brouwers <vincent.brouwers@ing.com>
											
										
										
											2023-08-28 09:43:47 +00:00
+								            top_tokens_
 								                .ids
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								                .iter()
 								                .zip(top_tokens_.logprobs.iter())
 								                .zip(top_tokens_.texts.iter())
 								                .zip(top_tokens_.is_special.iter())
 								                .map(|(((&id, &logprob), text), &special)| Token {
-												Rebased #617 (#868)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Vincent Brouwers <vincent.brouwers@ing.com>
											
										
										
											2023-08-28 09:43:47 +00:00
+								                    id,
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								                    text: text.to_string(),
-												Rebased #617 (#868)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Vincent Brouwers <vincent.brouwers@ing.com>
											
										
										
											2023-08-28 09:43:47 +00:00
+								                    logprob,
 								                    special,
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								                })
 								                .collect()
 								        } else {
 								            vec![]
 								        };
 								        match (&generation.generated_text, iterator.peek()) {
 								            (Some(generated_text), None) => {
 								                // Generation has ended
 								                stopped = true;
 								                // Send message
 								                entry.response_tx.send(Ok(InferStreamResponse::End {
 								                    token,
 								                    top_tokens,
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								                    generated_text: GeneratedText::from(generated_text.clone()),
-												Speculative (#1308)


											
										
										
											2023-12-11 11:46:30 +00:00
+								                    queued: entry.queue_time,
 								                    start: entry.batch_time.unwrap(),
 								                }))?;
 								            }
 								            _ => {
 								                // Send message
 								                entry
 								                    .response_tx
 								                    .send(Ok(InferStreamResponse::Intermediate { token, top_tokens }))?;
 								            }
 								        }
-												Rebased #617 (#868)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Vincent Brouwers <vincent.brouwers@ing.com>
											
										
										
											2023-08-28 09:43:47 +00:00
+								    }
-												feat(router): drop requests when client closes the channel (#202)


											
										
										
											2023-04-20 09:07:40 +00:00
+								    Ok(stopped)
 								}
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								/// Send errors to Infer for all `entries`
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								#[instrument(skip_all)]
 								fn send_errors(error: ClientError, entries: &mut IntMap<u64, Entry>) {
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								    entries.drain().for_each(|(_, entry)| {
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								        // Create and enter a span to link this function back to the entry
 								        let _send_error_span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_error").entered();
 								        let err = InferError::GenerationError(error.to_string());
-												feat(router): add prometheus metrics scrape endpoint (#71)


											
										
										
											2023-02-16 16:18:53 +00:00
+								        metrics::increment_counter!("tgi_request_failure", "err" => "generation");
-												feat: add distributed tracing (#62)


											
										
										
											2023-02-13 12:02:45 +00:00
+								        tracing::error!("{err}");
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								        // unwrap_or is valid here as we don't care if the receiver is gone.
 								        entry
 								            .response_tx
-												feat: remove flume (#1184)


											
										
										
											2023-10-23 13:51:12 +00:00
+								            .send(Err(err))
-												feat: Add token streaming using ServerSideEvents support (#41)


											
										
										
											2023-01-31 16:04:00 +00:00
+								            .unwrap_or(());
 								    });
 								}
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								impl From<text_generation_client::v3::GeneratedText> for GeneratedText {
 								    fn from(value: text_generation_client::v3::GeneratedText) -> Self {
 								        let v3_finish_reason =
 								            text_generation_client::v3::FinishReason::try_from(value.finish_reason).unwrap();
 								        let finish_reason = match v3_finish_reason {
 								            text_generation_client::v3::FinishReason::Length => FinishReason::Length,
 								            text_generation_client::v3::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
 								            text_generation_client::v3::FinishReason::StopSequence => FinishReason::StopSequence,
 								        };
-												feat(clients): Python client (#103)


											
										
										
											2023-03-07 17:52:22 +00:00
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								        Self {
 								            text: value.text,
 								            generated_tokens: value.generated_tokens,
 								            finish_reason,
 								            seed: value.seed,
-												feat(clients): Python client (#103)


											
										
										
											2023-03-07 17:52:22 +00:00
+								        }
 								    }
 								}
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
 								// tests
 								#[cfg(test)]
 								mod tests {
 								    use crate::infer::raise_exception;
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								    use crate::{ChatTemplateInputs, TextMessage};
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								    use minijinja::Environment;
 								    #[test]
 								    fn test_chat_template() {
 								        let env = Environment::new();
 								        let source = r#"
 								        {% for message in messages %}
 								            {% if message['role'] == 'system' %}
 								                {% if message['content']%}
 								                    {{'### System:\n' + message['content']+'\n\n'}}
 								                {% endif %}
 								            {% elif message['role'] == 'user' %}
 								                {{'### User:\n' + message['content']+'\n\n'}}
 								            {% elif message['role'] == 'assistant' %}
 								                {{'### Assistant:\n'  + message['content']}}
 								            {% endif %}
 								            {% if loop.last and add_generation_prompt %}
 								                {{ '### Assistant:\n' }}
 								            {% endif %}
 								        {% endfor %}"#;
 								        // trim all the whitespace
 								        let source = source
 								            .lines()
 								            .map(|line| line.trim())
 								            .collect::<Vec<&str>>()
 								            .join("");
 								        let tmpl = env.template_from_str(&source);
 								        let chat_template_inputs = ChatTemplateInputs {
 								            messages: vec![
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hi!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hello how can I help?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "What is Deep Learning?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "magic!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
 								            ],
 								            bos_token: Some("[BOS]"),
 								            eos_token: Some("[EOS]"),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								            add_generation_prompt: true,
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								            ..Default::default()
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								        };
 								        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
 								        assert_eq!(
 								            result,
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								            "### User:\nHi!\n\n### Assistant:\nHello how can I help?### User:\nWhat is Deep Learning?\n\n### Assistant:\nmagic!### Assistant:\n"
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								        );
 								    }
 								    #[test]
 								    fn test_chat_template_invalid_with_raise() {
 								        let mut env = Environment::new();
 								        env.add_function("raise_exception", raise_exception);
 								        let source = r#"
 								        {{ bos_token }}
 								        {% for message in messages %}
 								        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
 								        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
 								        {% endif %}
 								        {% if message['role'] == 'user' %}
 								        {{ '[INST] ' + message['content'] + ' [/INST]' }}
 								        {% elif message['role'] == 'assistant' %}
 								        {{ message['content'] + eos_token}}
 								        {% else %}
 								        {{ raise_exception('Only user and assistant roles are supported!') }}
 								        {% endif %}
 								        {% endfor %}"#;
 								        // trim all the whitespace
 								        let source = source
 								            .lines()
 								            .map(|line| line.trim())
 								            .collect::<Vec<&str>>()
 								            .join("");
 								        let tmpl = env.template_from_str(&source);
 								        let chat_template_inputs = ChatTemplateInputs {
 								            messages: vec![
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hi!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hi again!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hello how can I help?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "What is Deep Learning?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "magic!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
 								            ],
 								            bos_token: Some("[BOS]"),
 								            eos_token: Some("[EOS]"),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								            add_generation_prompt: true,
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								            ..Default::default()
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								        };
 								        let result = tmpl.unwrap().render(chat_template_inputs); //.err().unwrap();
 								        match result {
 								            Ok(_) => panic!("Should have failed"),
 								            Err(e) => {
 								                assert_eq!(
 								                    e.detail().unwrap(),
 								                    "Conversation roles must alternate user/assistant/user/assistant/..."
 								                );
 								            }
 								        }
 								    }
 								    #[test]
 								    fn test_chat_template_valid_with_raise() {
 								        let mut env = Environment::new();
 								        env.add_function("raise_exception", raise_exception);
 								        let source = r#"
 								        {{ bos_token }}
 								        {% for message in messages %}
 								        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
 								        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
 								        {% endif %}
 								        {% if message['role'] == 'user' %}
 								        {{ '[INST] ' + message['content'] + ' [/INST]' }}
 								        {% elif message['role'] == 'assistant' %}
 								        {{ message['content'] + eos_token}}
 								        {% else %}
 								        {{ raise_exception('Only user and assistant roles are supported!') }}
 								        {% endif %}
 								        {% endfor %}"#;
 								        // trim all the whitespace
 								        let source = source
 								            .lines()
 								            .map(|line| line.trim())
 								            .collect::<Vec<&str>>()
 								            .join("");
 								        let tmpl = env.template_from_str(&source);
 								        let chat_template_inputs = ChatTemplateInputs {
 								            messages: vec![
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hi!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hello how can I help?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "What is Deep Learning?".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "magic!".to_string(),
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								                },
 								            ],
 								            bos_token: Some("[BOS]"),
 								            eos_token: Some("[EOS]"),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								            add_generation_prompt: true,
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								            ..Default::default()
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								        };
 								        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
 								        assert_eq!(result, "[BOS][INST] Hi! [/INST]Hello how can I help?[EOS][INST] What is Deep Learning? [/INST]magic![EOS]");
 								    }
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
 								    #[test]
 								    fn test_chat_template_valid_with_add_generation_prompt() {
 								        let mut env = Environment::new();
 								        env.add_function("raise_exception", raise_exception);
 								        let source = r#"
 								        {% for message in messages %}
 								        {{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}
 								        {% endfor %}
 								        {% if add_generation_prompt %}
 								            {{ '<|im_start|>assistant\n' }}
 								        {% endif %}"#;
 								        // trim all the whitespace
 								        let source = source
 								            .lines()
 								            .map(|line| line.trim())
 								            .collect::<Vec<&str>>()
 								            .join("");
 								        let tmpl = env.template_from_str(&source);
 								        let chat_template_inputs = ChatTemplateInputs {
 								            messages: vec![
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hi!".to_string(),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "Hello how can I help?".to_string(),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                    role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "What is Deep Learning?".to_string(),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                TextMessage {
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                    role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                    content: "magic!".to_string(),
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								                },
 								            ],
 								            bos_token: Some("[BOS]"),
 								            eos_token: Some("[EOS]"),
 								            add_generation_prompt: true,
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								            ..Default::default()
-												feat: use existing add_generation_prompt variable from config in temp… (#1533)

This PR adds support to read the `add_generation_prompt` from the config
and use it in the chat template. If `add_generation_prompt` does not
exist we default to false
											
										
										
											2024-02-07 08:35:53 +00:00
+								        };
 								        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
 								        assert_eq!(result, "<|im_start|>user\nHi!<|im_end|>\n<|im_start|>assistant\nHello how can I help?<|im_end|>\n<|im_start|>user\nWhat is Deep Learning?<|im_end|>\n<|im_start|>assistant\nmagic!<|im_end|>\n<|im_start|>assistant\n");
 								    }
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
 								    struct ChatTemplateTestItem {
 								        name: &'static str,
 								        chat_template: &'static str,
 								        input: ChatTemplateInputs<'static>,
 								        target: &'static str,
 								    }
 								    #[test]
 								    fn test_many_chat_templates() {
 								        let example_chat = vec![
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								            TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                content: "Hello, how are you?".to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								            TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                role: "assistant".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                content: "I'm doing great. How can I help you today?".to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								            TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                content: "I'd like to show off how chat templating works!".to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								        ];
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								        let example_chat_with_system = [TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            role: "system".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								            content: "You are a friendly chatbot who always responds in the style of a pirate"
 								                .to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								        }]
 								        .iter()
 								        .chain(&example_chat)
 								        .cloned()
 								        .collect::<Vec<_>>();
 								        let test_default_templates = vec![
 								            ChatTemplateTestItem {
 								                name: "_base",
 								                chat_template: "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some(""),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "blenderbot",
 								                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "blenderbot_small",
 								                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "bloom",
 								                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "gpt_neox",
 								                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some("<|endoftext|>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "gpt2",
 								                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
 								                    eos_token: Some("<|endoftext|>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								            ChatTemplateTestItem {
 								                name: "llama",
 								                // NOTE: the `.strip()` has been replaced with `| trim` in the following template
 								                chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token +'[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content | trim + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
 								                    messages: example_chat_with_system.clone(),
 								                    add_generation_prompt: true,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "<s>[INST] <<SYS>>\nYou are a friendly chatbot who always responds in the style of a pirate\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								            ChatTemplateTestItem {
 								                name: "whisper",
 								                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: true,
 								                    bos_token: Some(""),
 								                    eos_token: Some("<|endoftext|>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
 								            },
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								        ];
 								        #[allow(unused_variables)] // name is unused
 								        for ChatTemplateTestItem {
 								            name,
 								            chat_template,
 								            input,
 								            target,
 								        } in test_default_templates
 								        {
 								            let mut env = Environment::new();
 								            env.add_function("raise_exception", raise_exception);
-												Upgrading to rust 1.78. (#1851)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-06 11:48:11 +00:00
+								            let tmpl = env.template_from_str(chat_template);
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            let result = tmpl.unwrap().render(input).unwrap();
 								            assert_eq!(result, target);
 								        }
 								        let test_custom_templates = vec![
 								            ChatTemplateTestItem {
 								                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=false)",
 								                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat_with_system.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some(""),
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHello, how are you?</s><|assistant|>\nI'm doing great. How can I help you today?</s><|user|>\nI'd like to show off how chat templating works!</s>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=true)",
 								                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: vec![
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								                        TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                            role: "system".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                            content: "You are a friendly chatbot who always responds in the style of a pirate".to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                        },
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								                        TextMessage {
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                            role: "user".to_string(),
-												Fixing types. (#1906)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
											
										
										
											2024-05-16 14:59:05 +00:00
+								                            content: "How many helicopters can a human eat in one sitting?".to_string(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                        },
 								                    ],
 								                    add_generation_prompt: true,
 								                    bos_token: Some(""),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHow many helicopters can a human eat in one sitting?</s><|assistant|>",
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								            ChatTemplateTestItem {
 								                name: "HuggingFaceH4/zephyr-7b-gemma-v0.1",
 								                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<bos>"),
 								                    eos_token: Some("<eos>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "mistralai/Mistral-7B-Instruct-v0.1",
 								                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]",
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								            ChatTemplateTestItem {
 								                name: "mistralai/Mixtral-8x7B-Instruct-v0.1",
 								                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works! [/INST]",
 								            },
 								            ChatTemplateTestItem {
 								                name: "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
 								                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
 								                input: ChatTemplateInputs {
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                    messages: example_chat.clone(),
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "openchat/openchat-3.5-0106",
 								                // `.title()` has been replaced with `| upper` in the following template
 								                chat_template: "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + (message['role'] | title) + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<s>GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "upstage/SOLAR-10.7B-Instruct-v1.0",
 								                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
 								            },
 								            ChatTemplateTestItem {
 								                name: "codellama/CodeLlama-70b-Instruct-hf",
 								                // NOTE: `.strip()` has been replaced with `| trim` in the following template
 								                chat_template: "{% if messages[0]['role'] == 'system' %}{% set user_index = 1 %}{% else %}{% set user_index = 0 %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != ((loop.index0 + user_index) % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{{ '<s>' }}{% endif %}{% set content = 'Source: ' + message['role'] + '\\n\\n ' + message['content'] | trim %}{{ content + ' <step> ' }}{% endfor %}{{'Source: assistant\\nDestination: user\\n\\n '}}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<s>Source: user\n\n Hello, how are you? <step> Source: assistant\n\n I'm doing great. How can I help you today? <step> Source: user\n\n I'd like to show off how chat templating works! <step> Source: assistant\nDestination: user\n\n ",
 								            },
 								            ChatTemplateTestItem {
 								                name: "Deci/DeciLM-7B-instruct",
 								                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '### User:\\n' + message['content'] }}\n{% elif message['role'] == 'system' %}\n{{ '### System:\\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '### Assistant:\\n'  + message['content'] }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '### Assistant:' }}\n{% endif %}\n{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "### User:\nHello, how are you?### Assistant:\nI'm doing great. How can I help you today?### User:\nI'd like to show off how chat templating works!",
 								            },
 								            ChatTemplateTestItem {
 								                name: "Qwen/Qwen1.5-72B-Chat",
 								                chat_template: "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!",
 								            },
 								            ChatTemplateTestItem {
 								                name: "deepseek-ai/deepseek-llm-7b-chat",
 								                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\\n\\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<｜begin▁of▁sentence｜>"),
 								                    eos_token: Some("<｜end▁of▁sentence｜>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<｜begin▁of▁sentence｜>User: Hello, how are you?\n\nAssistant: I'm doing great. How can I help you today?<｜end▁of▁sentence｜>User: I'd like to show off how chat templating works!\n\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "h2oai/h2o-danube-1.8b-chat",
 								                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompt|>' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|answer|>'  + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|answer|>' }}{% endif %}{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								                target: "<|prompt|>Hello, how are you?</s><|answer|>I'm doing great. How can I help you today?</s><|prompt|>I'd like to show off how chat templating works!</s>",
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            },
 								            ChatTemplateTestItem {
 								                name: "internlm/internlm2-chat-7b",
 								                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<s><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "TheBloke/deepseek-coder-33B-instruct-AWQ",
 								                chat_template: "{%- set found_item = false -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set found_item = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<｜begin▁of▁sentence｜>"),
 								                    eos_token: Some("<|EOT|>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n### Response:\n",
 								            },
 								            ChatTemplateTestItem {
 								                name: "ericzzz/falcon-rw-1b-chat",
 								                // `.strip()` has been replaced with `| trim` in the following template
 								                chat_template: "{% for message in messages %}{% if loop.index > 1 and loop.previtem['role'] != 'assistant' %}{{ ' ' }}{% endif %}{% if message['role'] == 'system' %}{{ '[SYS] ' + message['content'] | trim }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '[RESP] '  + message['content'] + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' [RESP] ' }}{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<|endoftext|>"),
 								                    eos_token: Some("<|endoftext|>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "[INST] Hello, how are you? [RESP] I'm doing great. How can I help you today?<|endoftext|>[INST] I'd like to show off how chat templating works!",
 								            },
 								            ChatTemplateTestItem {
 								                name: "abacusai/Smaug-34B-v0.1",
 								                chat_template: "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "Hello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
 								            },
 								            ChatTemplateTestItem {
 								                name: "maywell/Synatra-Mixtral-8x7B",
 								                chat_template: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n{% for message in messages %}{% if message['role'] == 'user' %}### Instruction:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'assistant' %}### Response:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'system' %}{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}\n### Response:\n{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "Below is an instruction that describes a task. Write a response that appropriately completes the request.### Instruction:Hello, how are you?### Response:I'm doing great. How can I help you today?### Instruction:I'd like to show off how chat templating works!",
 								            },
 								            ChatTemplateTestItem {
 								                name: "deepseek-ai/deepseek-coder-33b-instruct",
 								                chat_template: "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<｜begin▁of▁sentence｜>"),
 								                    eos_token: Some("</EOT>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "<｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n",
 								            },
 								            // NOT INCLUDED
-												feat: add SchedulerV3  (#1996)

- Refactor code to allow supporting multiple versions of the
generate.proto at the same time
- Add v3/generate.proto (ISO to generate.proto for now but allow for
future changes without impacting v2 backends)
- Add Schedule trait to abstract queuing and batching mechanisms that
will be different in the future
- Add SchedulerV2/V3 impl
											
										
										
											2024-06-04 13:56:56 +00:00
+								            // - meetkai/functionary-medium-v3.2
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								            // - fireworks-ai/firefunction-v1
 								            // https://github
 								            ChatTemplateTestItem {
 								                name: "maywell/PiVoT-MoE",
 								                chat_template: "{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content']|trim }}{% elif message['role'] == 'user' %}### Instruction: {{ message['content']|trim }}{% elif message['role'] == 'assistant' %}### Response: {{ message['content']|trim }}{% elif message['role'] == 'user_context' %}### Input: {{ message['content']|trim }}{% endif %}{% if not loop.last %}\n{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}### Response:{% endif %}",
 								                input: ChatTemplateInputs {
 								                    messages: example_chat_with_system.clone(),
 								                    add_generation_prompt: false,
 								                    bos_token: Some("<s>"),
 								                    eos_token: Some("</s>"),
-												feat: improve tools to include name and add tests (#1693)

This PR makes tool calling aware of the name of the function selected. 

Fixes:
https://github.com/huggingface/text-generation-inference/issues/1657

Thank you @puppetm4st3r for the helpful snippets, large parts of this PR
are simply refactors of the code shared 🙏

**opening draft PR because small tweaks are needed before merging
											
										
										
											2024-04-16 13:02:46 +00:00
+								                    ..Default::default()
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								                },
 								                target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
-												fix: fix CohereForAI/c4ai-command-r-plus (#1707)

@Narsil @drbh this will update flash attention v2 and vllm.
You will need to re-install them.
											
										
										
											2024-04-10 15:20:25 +00:00
+								            },
-												feat: bump minijina and add test for core templates (#1626)

This PR bumps `minijinja` and adds tests for all core models as
identified by @xenova 🙏

Inspiration:
https://github.com/huggingface/huggingface.js/blob/main/packages/jinja/test/e2e.test.js

TODO:
- [X] add new test to iterate over known templates
- [X] add default templates
- [x] add custom templates
											
										
										
											2024-03-20 13:13:46 +00:00
+								        ];
 								        #[allow(unused_variables)] // name is unused
 								        for ChatTemplateTestItem {
 								            name,
 								            chat_template,
 								            input,
 								            target,
 								        } in test_custom_templates
 								        {
 								            let mut env = Environment::new();
 								            env.add_function("raise_exception", raise_exception);
 								            // trim all the whitespace
 								            let chat_template = chat_template
 								                .lines()
 								                .map(|line| line.trim())
 								                .collect::<Vec<&str>>()
 								                .join("");
 								            let tmpl = env.template_from_str(&chat_template);
 								            let result = tmpl.unwrap().render(input).unwrap();
 								            assert_eq!(result, target);
 								        }
 								    }
-												feat: support raise_exception, bos and eos tokens (#1450)

This PR adds support to handle the custom jinja function
`raise_exception` and passes the `bos` and `eos` tokens into the
template

Additionally this PR adds 3 tests to validate and show examples of what
can and cannot be parsed currently.

```bash
cargo test --package text-generation-router --lib -- infer::tests --nocapture
#     Finished test [unoptimized + debuginfo] target(s) in 7.82s
#      Running unittests src/lib.rs (target/debug/deps/text_generation_router-18a0bbf99c2ca1b4)

# running 3 tests
# test infer::tests::test_chat_template_valid_with_raise ... ok
# test infer::tests::test_chat_template ... ok
# test infer::tests::test_chat_template_invalid_with_raise ... ok

# test result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 15 filtered out; finished in 0.00s
```
											
										
										
											2024-01-18 11:31:56 +00:00
+								}