fix: add missing stop parameter for chat request (#1619)

This PR adds the missing `stop` parameter to the `ChatRequest` struct which allows calls to specify a list of stop sequences
2025-09-16 23:04:52 +00:00 · 2024-03-01 12:08:11 -05:00 · 2024-03-01 12:08:11 -05:00 · dc7c69e887
commit dc7c69e887
parent 5a2a0ca0c0
2 changed files with 7 additions and 1 deletions
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -611,6 +611,11 @@ pub(crate) struct ChatRequest {
    #[schema(nullable = true, example = 0.1)]
    pub presence_penalty: Option<f32>,

+    /// Up to 4 sequences where the API will stop generating further tokens.
+    #[serde(default)]
+    #[schema(nullable = true, example = "null")]
+    pub stop: Option<Vec<String>>,
+
    #[serde(default = "bool::default")]
    pub stream: bool,

--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -765,6 +765,7 @@ async fn chat_completions(
        .map(|x| x + 2.0);
    let logprobs = req.logprobs.unwrap_or(false);
    let seed = req.seed;
+    let stop = req.stop.unwrap_or_default();

    // apply chat template to flatten the request into a single input
    let mut inputs = match infer.apply_chat_template(req.messages) {
@ -852,7 +853,7 @@ async fn chat_completions(
            do_sample: true,
            max_new_tokens,
            return_full_text: None,
-            stop: Vec::new(),
+            stop,
            truncate: None,
            watermark: false,
            details: true,