From f3c5d7d92f005c3cd6a801a33334fb9ba32f55f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 23 Aug 2024 22:06:22 +0200
Subject: [PATCH 01/37] nix: add default package (#2453)

The default package wraps the launcher and puts the server/router in the
path.

As a result, TGI can be started using something like:

```
nix run .# -- \
  --model-id hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4 \
  --port 8080
```
---
 flake.nix | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/flake.nix b/flake.nix
index 1c9c77a9..83feb26a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -99,6 +99,17 @@
             '';
           };
         };
+
+        packages.default = pkgs.writeShellApplication {
+          name = "text-generation-inference";
+          runtimeInputs = [
+            server
+            router
+          ];
+          text = ''
+            ${launcher}/bin/text-generation-launcher "$@"
+          '';
+        };
       }
     );
 }

From 30be188400d27b6fedd88cb3dfd88de45639703c Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 26 Aug 2024 17:04:46 -0400
Subject: [PATCH 02/37] Fix: don't apply post layernorm in
 SiglipVisionTransformer (#2459)

* Fix: don't apply post layernorm in SiglipVisionTransformer

This fixes a bug with LLaVA Next when using Siglip as the vision model. LLaVA Next expects the output of the vision model to be the encoder outputs before layernorm (see original transformers implementation here: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llava_next/modeling_llava_next.py#L813).

This also makes Siglip consistent with the existing Clip implementation:

https://github.com/huggingface/text-generation-inference/blob/main/server/text_generation_server/models/custom_modeling/clip.py#L613

* fix: adjust pali gemma for post layer norm and small refactors

---------

Co-authored-by: Travis Addair <tgaddair@gmail.com>
---
 .../custom_modeling/flash_pali_gemma_modeling.py    | 10 +++++++++-
 .../models/custom_modeling/siglip.py                | 13 +------------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
index d10efb41..e08a2aad 100644
--- a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
@@ -34,6 +34,11 @@ class PaliGemmaForConditionalGeneration(nn.Module):
             config=config.vision_config,
             weights=weights,
         )
+        self.post_vision_tower_layernorm = nn.LayerNorm.load(
+            prefix="vision_tower.vision_model.post_layernorm",
+            weights=weights,
+            eps=config.vision_config.layer_norm_eps,
+        )
 
         self.multi_modal_projector = TensorParallelColumnLinear.load(
             config,
@@ -84,7 +89,10 @@ class PaliGemmaForConditionalGeneration(nn.Module):
         if pixel_values is not None:
             pixel_values = pixel_values.to(dtype=inputs_embeds.dtype)
             image_outputs = self.vision_tower(pixel_values)
-            image_features = self.multi_modal_projector(image_outputs.last_hidden_state)
+            last_hidden_state = self.post_vision_tower_layernorm(
+                image_outputs.last_hidden_state
+            )
+            image_features = self.multi_modal_projector(last_hidden_state)
 
             # mask where image or padding tokens
             mask = input_ids == self.config.image_token_index
diff --git a/server/text_generation_server/models/custom_modeling/siglip.py b/server/text_generation_server/models/custom_modeling/siglip.py
index 480d0f9f..95ac9ede 100644
--- a/server/text_generation_server/models/custom_modeling/siglip.py
+++ b/server/text_generation_server/models/custom_modeling/siglip.py
@@ -364,7 +364,6 @@ class SiglipEncoder(nn.Module):
         inputs_embeds,
         attention_mask: Optional[torch.Tensor] = None,
     ):
-
         hidden_states = inputs_embeds
         for idx, encoder_layer in enumerate(self.layers):
             hidden_states, _ = encoder_layer(
@@ -386,20 +385,11 @@ class SiglipVisionTransformer(nn.Module):
         self.encoder = SiglipEncoder(
             prefix=f"{prefix}.encoder", config=config, weights=weights
         )
-        self.post_layernorm = nn.LayerNorm.load(
-            prefix=f"{prefix}.post_layernorm",
-            weights=weights,
-            eps=config.layer_norm_eps,
-        )
 
     def forward(
         self,
         pixel_values: Optional[torch.FloatTensor] = None,
     ):
-        r"""
-        Returns:
-
-        """
         if pixel_values is None:
             raise ValueError("You have to specify pixel_values")
 
@@ -412,10 +402,9 @@ class SiglipVisionTransformer(nn.Module):
             inputs_embeds=hidden_states,
         )
         last_hidden_state = encoder_outputs
-        post_last_hidden_state = self.post_layernorm(last_hidden_state)
 
         return BaseModelOutputWithPooling(
-            last_hidden_state=post_last_hidden_state,
+            last_hidden_state=last_hidden_state,
             # pooler_output=pooled_output,
             # hidden_states=encoder_outputs,
         )

From cfa73b5c99bc009903fbc340f8b77a6d4674455d Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 26 Aug 2024 20:19:38 -0400
Subject: [PATCH 03/37] Pr 2451 ci branch (#2454)

* fix[router]: Fix tools not passed in chat template

Signed-off-by: GitHub <noreply@github.com>

* feat: improve default tool serialization and lints

* feat: refactor tool logic to include notify_error in prompt and adjust typing

* fix: adjust non tool template apply

* fix: simplify tool grammar logic and improve schema

* feat: avoid skip tool test and avoid empty tool prompts

* fix: increase test client timeout for grammar compilation tests

---------

Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: Simone Rossi <simone.rossi.93@gmail.com>
---
 Cargo.lock                                   |   1 +
 clients/python/text_generation/client.py     |   7 +-
 docs/openapi.json                            |   2 +-
 integration-tests/conftest.py                |   2 +-
 integration-tests/models/test_tools_llama.py |  50 +++---
 router/Cargo.toml                            |   2 +-
 router/src/infer/chat_template.rs            |  57 ++++---
 router/src/infer/mod.rs                      |   6 +-
 router/src/infer/tool_grammar.rs             | 121 +++++++-------
 router/src/lib.rs                            |  15 +-
 router/src/server.rs                         | 160 ++++++++++++++++---
 11 files changed, 268 insertions(+), 155 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d298c379..aa5cb642 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2174,6 +2174,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "45f7e8e35b6c7b169bf40b0176d2c79291ab8ee53290b84e0668ab21d841aa9d"
 dependencies = [
  "serde",
+ "serde_json",
 ]
 
 [[package]]
diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py
index 12966747..45301b63 100644
--- a/clients/python/text_generation/client.py
+++ b/clients/python/text_generation/client.py
@@ -757,7 +757,12 @@ class AsyncClient:
                         continue
                     payload = byte_payload.decode("utf-8")
                     if payload.startswith("data:"):
-                        json_payload = json.loads(payload.lstrip("data:").rstrip("\n"))
+                        payload_data = (
+                            payload.lstrip("data:").rstrip("\n").removeprefix(" ")
+                        )
+                        if payload_data == "[DONE]":
+                            break
+                        json_payload = json.loads(payload_data)
                         try:
                             response = ChatCompletionChunk(**json_payload)
                             yield response
diff --git a/docs/openapi.json b/docs/openapi.json
index df21e19d..fd64a3ab 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -924,7 +924,7 @@
           "tool_prompt": {
             "type": "string",
             "description": "A prompt to be appended before the tools",
-            "example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
+            "example": "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.",
             "nullable": true
           },
           "tools": {
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 15af1cad..a8a77cd2 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -257,7 +257,7 @@ class IgnoreLogProbResponseComparator(ResponseComparator):
 
 class LauncherHandle:
     def __init__(self, port: int):
-        self.client = AsyncClient(f"http://localhost:{port}")
+        self.client = AsyncClient(f"http://localhost:{port}", timeout=30)
 
     def _inner_health(self):
         raise NotImplementedError
diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py
index f831990a..9855cfda 100644
--- a/integration-tests/models/test_tools_llama.py
+++ b/integration-tests/models/test_tools_llama.py
@@ -36,6 +36,7 @@ tools = [
                     },
                 },
                 "required": ["location", "format"],
+                "additionalProperties": False,
             },
         },
     },
@@ -62,13 +63,13 @@ tools = [
                     },
                 },
                 "required": ["location", "format", "num_days"],
+                "additionalProperties": False,
             },
         },
     },
 ]
 
 
-@pytest.mark.skip(reason="Takes too long to run")
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_snapshot):
@@ -76,7 +77,7 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
         max_tokens=100,
         seed=1,
         tools=tools,
-        presence_penalty=-1.1,
+        temperature=0.0,
         messages=[
             {
                 "role": "system",
@@ -91,19 +92,18 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
     assert response.choices[0].message.content is None
     assert response.choices[0].message.tool_calls == [
         {
-            "id": 0,
+            "id": "0",
             "type": "function",
             "function": {
                 "description": None,
                 "name": "get_current_weather",
-                "arguments": {"format": "celsius", "location": "New York, NY"},
+                "arguments": {"format": "celsius", "location": "Brooklyn, NY"},
             },
         }
     ]
     assert response == response_snapshot
 
 
-@pytest.mark.skip(reason="Takes too long to run")
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_grammar_tools_auto(
@@ -113,8 +113,8 @@ async def test_flash_llama_grammar_tools_auto(
         max_tokens=100,
         seed=1,
         tools=tools,
+        temperature=0.0,
         tool_choice="auto",
-        presence_penalty=-1.1,
         messages=[
             {
                 "role": "system",
@@ -129,12 +129,12 @@ async def test_flash_llama_grammar_tools_auto(
     assert response.choices[0].message.content is None
     assert response.choices[0].message.tool_calls == [
         {
-            "id": 0,
+            "id": "0",
             "type": "function",
             "function": {
                 "description": None,
                 "name": "get_current_weather",
-                "arguments": {"format": "celsius", "location": "New York, NY"},
+                "arguments": {"format": "celsius", "location": "Brooklyn, NY"},
             },
         }
     ]
@@ -142,7 +142,6 @@ async def test_flash_llama_grammar_tools_auto(
     assert response == response_snapshot
 
 
-@pytest.mark.skip(reason="Takes too long to run")
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_grammar_tools_choice(
@@ -152,8 +151,8 @@ async def test_flash_llama_grammar_tools_choice(
         max_tokens=100,
         seed=1,
         tools=tools,
+        temperature=0.0,
         tool_choice="get_current_weather",
-        presence_penalty=-1.1,
         messages=[
             {
                 "role": "system",
@@ -168,12 +167,12 @@ async def test_flash_llama_grammar_tools_choice(
     assert response.choices[0].message.content is None
     assert response.choices[0].message.tool_calls == [
         {
-            "id": 0,
+            "id": "0",
             "type": "function",
             "function": {
                 "description": None,
                 "name": "get_current_weather",
-                "arguments": {"format": "celsius", "location": "New York, NY"},
+                "arguments": {"format": "celsius", "location": "Brooklyn, NY"},
             },
         }
     ]
@@ -181,7 +180,6 @@ async def test_flash_llama_grammar_tools_choice(
     assert response == response_snapshot
 
 
-@pytest.mark.skip(reason="Takes too long to run")
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_grammar_tools_stream(
@@ -191,8 +189,8 @@ async def test_flash_llama_grammar_tools_stream(
         max_tokens=100,
         seed=1,
         tools=tools,
+        temperature=0.0,
         tool_choice="get_current_weather",
-        presence_penalty=-1.1,
         messages=[
             {
                 "role": "system",
@@ -210,11 +208,10 @@ async def test_flash_llama_grammar_tools_stream(
     async for response in responses:
         count += 1
 
-    assert count == 38
+    assert count == 48
     assert response == response_snapshot
 
 
-@pytest.mark.skip(reason="Takes too long to run")
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_llama_grammar_tools_insufficient_information(
@@ -222,13 +219,13 @@ async def test_flash_llama_grammar_tools_insufficient_information(
 ):
     responses = await flash_llama_grammar_tools.chat(
         max_tokens=100,
-        seed=8,
+        seed=24,
         tools=tools,
         tool_choice="auto",
         messages=[
             {
                 "role": "system",
-                "content": "ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION",
+                "content": "STRICTLY ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION",
             },
             {
                 "role": "user",
@@ -239,18 +236,7 @@ async def test_flash_llama_grammar_tools_insufficient_information(
     )
 
     assert responses.choices[0].message.content is None
-    assert responses.choices[0].message.tool_calls == [
-        {
-            "function": {
-                "arguments": {
-                    "error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options."
-                },
-                "description": None,
-                "name": "notify_error",
-            },
-            "id": 0,
-            "type": "function",
-        }
-    ]
-
+    assert (
+        responses.choices[0].message.tool_calls[0]["function"]["name"] == "notify_error"
+    )
     assert responses == response_snapshot
diff --git a/router/Cargo.toml b/router/Cargo.toml
index 7773e212..45acab8e 100644
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@@ -46,7 +46,7 @@ ngrok = { version = "0.13.1", features = ["axum"], optional = true }
 init-tracing-opentelemetry = { version = "0.14.1", features = [
   "opentelemetry-otlp",
 ] }
-minijinja = { version = "2.0.2" }
+minijinja = { version = "2.0.2", features = ["json"] }
 minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
 futures-util = "0.3.30"
 regex = "1.10.3"
diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs
index a8537818..bfa9421c 100644
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@@ -1,9 +1,7 @@
 use std::collections::HashSet;
 
 use crate::infer::InferError;
-use crate::{
-    ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken,
-};
+use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool};
 use minijinja::{Environment, ErrorKind, Template};
 use minijinja_contrib::pycompat;
 
@@ -32,6 +30,7 @@ impl ChatTemplate {
         env.set_unknown_method_callback(pycompat::unknown_method_callback);
         let template_str = template.into_boxed_str();
         env.add_function("raise_exception", raise_exception);
+        tracing::debug!("Loading template: {:#?}", template_str);
 
         // leaking env and template_str as read-only, static resources for performance.
         let template = Box::leak(env)
@@ -42,6 +41,7 @@ impl ChatTemplate {
         let variables = template.undeclared_variables(true);
         // check if the `tools` variable is used in the template
         let use_default_tool_template = !variables.contains("tools");
+        tracing::debug!("Use default tool template: {}", use_default_tool_template);
 
         Self {
             template,
@@ -56,25 +56,36 @@ impl ChatTemplate {
         &self,
         guideline: Option<&str>,
         mut messages: Vec<Message>,
-        grammar_with_prompt: Option<(GrammarType, String)>,
+        tools_and_prompt: Option<(Vec<Tool>, String)>,
     ) -> Result<String, InferError> {
-        if self.use_default_tool_template {
-            if let Some(last_message) = messages.last_mut() {
-                if let Some((GrammarType::Json(tools), tool_prompt)) = grammar_with_prompt {
-                    last_message.content.push(MessageChunk::Text {
-                        text: format!("\n---\n{}\n{}", tool_prompt, tools),
-                    });
-                }
-            }
-        }
-
-        let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
-
         // check if guideline is expected but not provided
         if self.variables.contains("guideline") && guideline.is_none() {
             return Err(InferError::MissingTemplateVariable("guideline".to_string()));
         }
 
+        let tools = match tools_and_prompt {
+            Some((tools, tool_prompt)) => {
+                // check if the `tools` variable is used in the template
+                // if not, we need to append the tools to the last message
+                let text = if self.use_default_tool_template {
+                    match serde_json::to_string(&tools) {
+                        Ok(tools_str) => format!("\n---\n{}\n{}", tools_str, tool_prompt),
+                        Err(e) => return Err(InferError::ToolError(e.to_string())),
+                    }
+                } else {
+                    // if the `tools` variable is used in the template, we just append the tool_prompt
+                    format!("\n---\n{}", tool_prompt)
+                };
+                if let Some(last_message) = messages.last_mut() {
+                    last_message.content.push(MessageChunk::Text { text });
+                }
+                Some(tools)
+            }
+            None => None,
+        };
+
+        let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
+
         self.template
             .render(ChatTemplateInputs {
                 guideline,
@@ -82,8 +93,7 @@ impl ChatTemplate {
                 bos_token: self.bos_token.as_deref(),
                 eos_token: self.eos_token.as_deref(),
                 add_generation_prompt: true,
-                tools: None,
-                tools_prompt: None,
+                tools,
             })
             .map_err(InferError::TemplateError)
     }
@@ -95,7 +105,7 @@ mod tests {
     use crate::infer::chat_template::raise_exception;
     use crate::infer::ChatTemplate;
     use crate::{
-        ChatTemplateInputs, GrammarType, Message, MessageContent, TextMessage, TokenizerConfigToken,
+        ChatTemplateInputs, Message, MessageContent, TextMessage, TokenizerConfigToken, Tool,
     };
     use minijinja::Environment;
 
@@ -854,11 +864,12 @@ mod tests {
                 content: MessageContent::SingleText("Just testing".to_string()),
             },
         ];
-        let tools = serde_json::json!("[]");
+        let tools_string = r#"[{"type": "function","function": {"name": "get_current_weather","description": "Get the current weather","parameters": {"type": "object","properties": {"location": {"type": "string","description": "The city and state, e.g. San Francisco, CA"},"format": {"type": "string","enum": ["celsius", "fahrenheit"],"description": "The temperature unit to use. Infer this from the users location."}},"required": ["location", "format"]}}}]"#.to_string();
+        let tools: Vec<Tool> = serde_json::from_str(&tools_string).unwrap();
         let tool_prompt = "This default prompt will be used".to_string();
-        let grammer_with_prompt = (GrammarType::Json(tools), tool_prompt);
-        let result = ct.apply(None, msgs, Some(grammer_with_prompt));
-        let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\nThis default prompt will be used\n\"[]\" [/INST]".to_string();
+        let tools_and_prompt = Some((tools, tool_prompt));
+        let result = ct.apply(None, msgs, tools_and_prompt);
+        let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"properties\":{\"format\":{\"description\":\"The temperature unit to use. Infer this from the users location.\",\"enum\":[\"celsius\",\"fahrenheit\"],\"type\":\"string\"},\"location\":{\"description\":\"The city and state, e.g. San Francisco, CA\",\"type\":\"string\"}},\"required\":[\"location\",\"format\"],\"type\":\"object\"}}}]\nThis default prompt will be used [/INST]".to_string();
         assert_eq!(result.unwrap(), expected);
     }
 }
diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs
index c9354d9a..81c0d38f 100644
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@@ -3,7 +3,7 @@ mod chat_template;
 pub mod tool_grammar;
 
 use crate::validation::{ValidGenerateRequest, Validation, ValidationError};
-use crate::GrammarType;
+use crate::Tool;
 use crate::{
     ChatTemplateVersions, FinishReason, GenerateRequest, HubProcessorConfig, HubTokenizerConfig,
     Message, PrefillToken, Token,
@@ -140,12 +140,12 @@ impl Infer {
         &self,
         guideline: Option<String>,
         messages: Vec<Message>,
-        grammar_with_prompt: Option<(GrammarType, String)>,
+        tools_and_prompt: Option<(Vec<Tool>, String)>,
     ) -> Result<String, InferError> {
         self.chat_template
             .as_ref()
             .ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
-            .apply(guideline.as_deref(), messages, grammar_with_prompt)
+            .apply(guideline.as_deref(), messages, tools_and_prompt)
             .map_err(|e| {
                 metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
                 tracing::error!("{e}");
diff --git a/router/src/infer/tool_grammar.rs b/router/src/infer/tool_grammar.rs
index 05027f30..4fe15720 100644
--- a/router/src/infer/tool_grammar.rs
+++ b/router/src/infer/tool_grammar.rs
@@ -1,5 +1,8 @@
 use crate::infer::InferError;
-use crate::{FunctionRef, FunctionsMap, Properties, Tool, ToolChoice, ToolType, Tools};
+use crate::{
+    FunctionDefinition, FunctionRef, FunctionsMap, JsonSchemaTool, Properties, Tool, ToolChoice,
+    ToolType,
+};
 use serde_json::{json, Map, Value};
 use std::collections::HashMap;
 
@@ -16,17 +19,38 @@ impl ToolGrammar {
     }
 
     pub fn apply(
-        tools: Option<Vec<Tool>>,
+        tools: Vec<Tool>,
         tool_choice: ToolChoice,
-    ) -> Result<Option<Tools>, InferError> {
+    ) -> Result<(Vec<Tool>, Option<JsonSchemaTool>), InferError> {
         // if no tools are provided, we return None
-        let tools = match tools {
-            Some(tools) if !tools.is_empty() => tools,
-            _ => return Ok(None),
-        };
+        if tools.is_empty() {
+            return Ok((tools, None));
+        }
 
         let tool_choice = tool_choice.0.unwrap_or(ToolType::OneOf);
 
+        let mut tools = tools.clone();
+
+        // add the notify_error function to the tools
+        let notify_error = Tool {
+            r#type: "function".to_string(),
+            function: FunctionDefinition {
+                name: "notify_error".to_string(),
+                description: Some("Notify an error or issue".to_string()),
+                arguments: json!({
+                    "type": "object",
+                    "properties": {
+                        "error": {
+                            "type": "string",
+                            "description": "The error or issue to notify"
+                        }
+                    },
+                    "required": ["error"]
+                }),
+            },
+        };
+        tools.push(notify_error);
+
         // if tools are provided and no tool_choice we default to the OneOf
         let tools_to_use = match tool_choice {
             ToolType::FunctionName(name) => {
@@ -35,87 +59,57 @@ impl ToolGrammar {
             ToolType::Function { function } => {
                 vec![Self::find_tool_by_name(&tools, &function.name)?]
             }
-            ToolType::OneOf => tools,
-            ToolType::NoTool => return Ok(None),
+            ToolType::OneOf => tools.clone(),
+            ToolType::NoTool => return Ok((tools, None)),
         };
 
-        // adds the error notification function for LLM feedback if required
-        let mut text_response_properties = Map::new();
-        text_response_properties.insert(
-            "error".to_string(),
-            serde_json::json!({
-                "type": "string",
-                "description": "The error or issue to notify"
-            }),
-        );
-        text_response_properties.insert(
-            "_name".to_string(),
-            serde_json::json!({
-                "type": "string",
-                "const": "notify_error"
-            }),
-        );
-
         let functions: HashMap<String, serde_json::Value> = tools_to_use
             .iter()
             .map(|tool| {
                 let func = tool.function.clone();
 
-                // Clone the existing parameters, which are expected to be a JSON object
-                let mut params = if let Value::Object(params) = &func.arguments {
-                    params.clone()
-                } else {
-                    Map::new()
-                };
+                let mut params = Map::new();
 
-                // Insert the function's description at the top level, outside of properties
                 params.insert(
                     "description".to_string(),
-                    Value::String(func.description.clone().unwrap_or_default()),
+                    Value::String(func.description.unwrap_or_default()),
                 );
 
-                // Ensure 'properties' exists and is an object
-                let properties = params
-                    .entry("properties".to_string())
-                    .or_insert_with(|| json!({}))
-                    .as_object_mut()
-                    .unwrap();
+                let mut properties = Map::new();
+                let mut required = vec![Value::String("_name".to_string())];
 
-                // Insert the constant for the function name inside 'properties'
                 properties.insert(
                     "_name".to_string(),
                     json!({
                         "type": "string",
                         "const": func.name.clone(),
-                        // "description": "The name of the function"
                     }),
                 );
 
-                // Check if 'required' exists, and it is an array. If not, create an empty array.
-                let required = params
-                    .entry("required".to_string())
-                    .or_insert_with(|| json!([]))
-                    .as_array_mut()
-                    .unwrap();
-
-                // Add 'name' to the 'required' array if it is not already present
-                if !required.iter().any(|r| r == "_name") {
-                    required.push(json!("_name"));
+                if let Value::Object(args) = func.arguments {
+                    if let Some(Value::Object(props)) = args.get("properties") {
+                        properties.extend(props.clone());
+                    }
+                    if let Some(Value::Array(reqs)) = args.get("required") {
+                        required.extend(reqs.clone());
+                    }
+                    params.insert(
+                        "additionalProperties".to_string(),
+                        Value::Bool(
+                            args.get("additionalProperties").and_then(|v| v.as_str())
+                                == Some("true"),
+                        ),
+                    );
                 }
 
+                params.insert("properties".to_string(), Value::Object(properties));
+                params.insert("required".to_string(), Value::Array(required));
+
                 (func.name, Value::Object(params))
             })
-            .chain([(
-                "notify_error".to_string(),
-                serde_json::json!({
-                    "properties": text_response_properties,
-                    "required": ["error", "_name"],
-                    "type": "object"
-                }),
-            )])
             .collect();
 
-        let tools = Tools {
+        let tool_schema = JsonSchemaTool {
             functions_map: FunctionsMap { functions },
             properties: Properties {
                 function: tools_to_use
@@ -123,13 +117,10 @@ impl ToolGrammar {
                     .map(|tool| FunctionRef {
                         ref_path: format!("#/$functions/{}", tool.function.name.clone()),
                     })
-                    .chain(std::iter::once(FunctionRef {
-                        ref_path: "#/$functions/notify_error".to_string(),
-                    }))
                     .collect(),
             },
         };
 
-        Ok(Some(tools))
+        Ok((tools, Some(tool_schema)))
     }
 }
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 1b2ff153..ce4f7c46 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -840,10 +840,10 @@ pub(crate) struct ChatRequest {
     pub tools: Option<Vec<Tool>>,
 
     /// A prompt to be appended before the tools
-    #[serde(default = "default_tool_prompt")]
+    #[serde(default)]
     #[schema(
         nullable = true,
-        example = "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\""
+        example = "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables."
     )]
     pub tool_prompt: Option<String>,
 
@@ -865,10 +865,8 @@ pub(crate) struct ChatRequest {
     pub guideline: Option<String>,
 }
 
-fn default_tool_prompt() -> Option<String> {
-    Some(
-        "\nYou will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n".to_string(),
-    )
+pub fn default_tool_prompt() -> String {
+    "\nGiven the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.\n".to_string()
 }
 
 #[derive(Clone, Debug, Deserialize, PartialEq, Serialize, ToSchema)]
@@ -910,7 +908,7 @@ impl From<ToolTypeDeserializer> for ToolChoice {
 }
 
 #[derive(Debug, Deserialize, Serialize, ToSchema, PartialEq)]
-pub struct Tools {
+pub struct JsonSchemaTool {
     #[serde(flatten)]
     functions_map: FunctionsMap,
     properties: Properties,
@@ -968,8 +966,7 @@ pub(crate) struct ChatTemplateInputs<'a> {
     bos_token: Option<&'a str>,
     eos_token: Option<&'a str>,
     add_generation_prompt: bool,
-    tools: Option<&'a str>,
-    tools_prompt: Option<&'a str>,
+    tools: Option<Vec<Tool>>,
     guideline: Option<&'a str>,
 }
 
diff --git a/router/src/server.rs b/router/src/server.rs
index 8ec7a871..8ebd1a33 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -8,7 +8,7 @@ use crate::kserve::{
     kserve_model_metadata, kserve_model_metadata_ready,
 };
 use crate::validation::ValidationError;
-use crate::ChatTokenizeResponse;
+use crate::{default_tool_prompt, ChatTokenizeResponse};
 use crate::{
     usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
     GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
@@ -23,7 +23,7 @@ use crate::{
     CompletionRequest, CompletionType, DeltaToolCall, Function, Prompt, Tool, VertexRequest,
     VertexResponse,
 };
-use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice, ToolType, Tools};
+use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice, ToolType};
 use async_stream::__private::AsyncStream;
 use axum::extract::Extension;
 use axum::http::{HeaderMap, HeaderValue, Method, StatusCode};
@@ -146,7 +146,7 @@ async fn get_chat_tokenize(
     } = req;
 
     let tool_prompt = tool_prompt.unwrap_or_default();
-    let (inputs, _grammar, _tool_grammar) = prepare_chat_input(
+    let (inputs, _grammar, _using_tools) = prepare_chat_input(
         &infer,
         response_format,
         tools,
@@ -1158,14 +1158,16 @@ async fn chat_completions(
     let repetition_penalty = presence_penalty.map(|x| x + 2.0);
     let max_new_tokens = max_tokens.or(Some(100));
     let logprobs = logprobs.unwrap_or(false);
-    let tool_prompt = tool_prompt.unwrap_or_default();
+    let tool_prompt = tool_prompt
+        .filter(|s| !s.is_empty())
+        .unwrap_or_else(default_tool_prompt);
     let stop = stop.unwrap_or_default();
     // enable greedy only when temperature is 0
     let (do_sample, temperature) = match temperature {
         Some(temperature) if temperature == 0.0 => (false, None),
         other => (true, other),
     };
-    let (inputs, grammar, tool_grammar) = prepare_chat_input(
+    let (inputs, grammar, using_tools) = prepare_chat_input(
         &infer,
         response_format,
         tools,
@@ -1221,7 +1223,7 @@ async fn chat_completions(
             });
 
             // replace the content with the tool calls if grammar is present
-            let (content, tool_calls) = if tool_grammar.is_some() {
+            let (content, tool_calls) = if using_tools {
                 (None, Some(vec![stream_token.token.text]))
             } else {
                 let content = if !stream_token.token.special {
@@ -1275,7 +1277,7 @@ async fn chat_completions(
             .unwrap_or_else(|_| std::time::Duration::from_secs(0))
             .as_secs();
 
-        let (tool_calls, output) = if tool_grammar.is_some() {
+        let (tool_calls, output) = if using_tools {
             let gen_text_value: Value =
                 serde_json::from_str(&generation.generated_text).map_err(|e| {
                     InferError::ToolError(format!(
@@ -2539,7 +2541,7 @@ fn create_post_processor(
     Ok(post_processor)
 }
 
-type PreparedInput = (String, Option<GrammarType>, Option<Tools>);
+type PreparedInput = (String, Option<GrammarType>, bool);
 
 fn prepare_chat_input(
     infer: &Infer,
@@ -2556,19 +2558,139 @@ fn prepare_chat_input(
         ));
     }
 
+    // when response_format is set, tools are not included when applying the chat template to generate inputs
     if let Some(format) = response_format {
         let inputs = infer.apply_chat_template(guideline, messages, None)?;
-        return Ok((inputs, Some(format), None));
+        return Ok((inputs, Some(format), false));
     }
 
-    // if tools are set, apply the tool grammar and then the chat template
-    let tool_grammar: Option<Tools> = ToolGrammar::apply(tools, tool_choice)?;
-    let grammar = tool_grammar
-        .as_ref()
-        .map(|t| GrammarType::Json(serde_json::json!(t)));
-    let tools_grammar_prompt = tool_grammar
-        .as_ref()
-        .map(|t| (GrammarType::Json(serde_json::json!(t)), tool_prompt.into()));
-    let inputs = infer.apply_chat_template(guideline, messages, tools_grammar_prompt)?;
-    Ok((inputs, grammar, tool_grammar))
+    // when no response_format is set and tools are included, apply the chat template with the tools
+    // to generate inputs
+    if let Some(tools) = tools {
+        let (updated_tools, tool_schema) = ToolGrammar::apply(tools, tool_choice)?;
+
+        let grammar = tool_schema
+            .as_ref()
+            .map(|t| GrammarType::Json(serde_json::json!(t)));
+
+        let inputs: String = infer.apply_chat_template(
+            guideline,
+            messages,
+            Some((updated_tools, tool_prompt.into())),
+        )?;
+        return Ok((inputs, grammar, tool_schema.is_some()));
+    }
+
+    // if no response_format or tools are set simply apply the chat template to generate inputs
+    let inputs = infer.apply_chat_template(guideline, messages, None)?;
+    Ok((inputs, None, false))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ChatTemplateVersions;
+    use crate::HubTokenizerConfig;
+    use crate::TokenizerConfigToken;
+    use crate::Tool;
+
+    use serde_json::json;
+
+    #[test]
+    fn test_prepare_chat_input() {
+        // Mock Backend to avoid network requests
+        struct MockBackend;
+
+        impl Backend for MockBackend {
+            fn schedule(
+                &self,
+                _request: crate::validation::ValidGenerateRequest,
+            ) -> Result<
+                tokio_stream::wrappers::UnboundedReceiverStream<
+                    Result<InferStreamResponse, InferError>,
+                >,
+                InferError,
+            > {
+                unimplemented!("Never called in this test");
+            }
+            fn health<'a, 'async_trait>(
+                &'a self,
+                _current_health: bool,
+            ) -> core::pin::Pin<
+                Box<dyn core::future::Future<Output = bool> + core::marker::Send + 'async_trait>,
+            >
+            where
+                'a: 'async_trait,
+                Self: 'async_trait,
+            {
+                unimplemented!("Never called in this test");
+            }
+        }
+
+        let backend = MockBackend {};
+
+        let mut tokenizer_config = HubTokenizerConfig::default();
+
+        // mock tokenizer config values
+        tokenizer_config.bos_token = Some(TokenizerConfigToken::String("<s>".to_string()));
+        tokenizer_config.eos_token = Some(TokenizerConfigToken::String("</s>".to_string()));
+        tokenizer_config.chat_template = Some(
+            ChatTemplateVersions::Single("{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n".to_string())
+        );
+
+        let infer = Infer::new(
+            backend,
+            Validation::new(1, None, None, None, 1, 1, 1, 1, 1, false),
+            1,
+            tokenizer_config,
+            HubProcessorConfig::default(),
+        );
+        let response_format = None;
+        let tools = Some(vec![Tool {
+            r#type: "function".to_string(),
+            function: FunctionDefinition {
+                name: "get_current_weather".to_string(),
+                description: Some("Get the current weather".to_string()),
+                arguments: json!({
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA"
+                        },
+                        "format": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The temperature unit to use. Infer this from the users location."
+                        }
+                    },
+                    "required": ["location", "format"]
+                }),
+            },
+        }]);
+        let tool_prompt = "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.";
+        let guideline = None;
+        let messages = vec![Message {
+            name: None,
+            role: "user".to_string(),
+            content: MessageContent::SingleText(
+                "What is the weather like in New York?".to_string(),
+            ),
+        }];
+
+        let result = prepare_chat_input(
+            &infer,
+            response_format,
+            tools,
+            ToolChoice(None),
+            tool_prompt,
+            guideline,
+            messages,
+        );
+
+        assert!(result.is_ok());
+        let (inputs, _grammar, using_tools) = result.unwrap();
+        assert_eq!(using_tools, true);
+        assert_eq!(inputs, "<s>[AVAILABLE_TOOLS] [{\"type\": \"function\", \"function\": {\"arguments\": {\"properties\":{\"format\":{\"description\":\"The temperature unit to use. Infer this from the users location.\",\"enum\":[\"celsius\",\"fahrenheit\"],\"type\":\"string\"},\"location\":{\"description\":\"The city and state, e.g. San Francisco, CA\",\"type\":\"string\"}},\"required\":[\"location\",\"format\"],\"type\":\"object\"}, \"description\": \"Get the current weather\", \"name\": \"get_current_weather\"}}, {\"type\": \"function\", \"function\": {\"arguments\": {\"properties\":{\"error\":{\"description\":\"The error or issue to notify\",\"type\":\"string\"}},\"required\":[\"error\"],\"type\":\"object\"}, \"description\": \"Notify an error or issue\", \"name\": \"notify_error\"}}][/AVAILABLE_TOOLS][INST] What is the weather like in New York?\n---\nGiven the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.[/INST]".to_string());
+    }
 }

From 2788d41a76c193d4de7055dc5ef38a97f25c38b5 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Tue, 27 Aug 2024 15:33:02 +0200
Subject: [PATCH 04/37] Fixing CI. (#2462)

---
 .github/workflows/build.yaml    | 4 ----
 .github/workflows/ci_build.yaml | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 1f72c46d..d415f369 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -32,10 +32,6 @@ jobs:
     permissions:
       contents: write
       packages: write
-      # This is used to complete the identity challenge
-      # with sigstore/fulcio when running outside of PRs.
-      id-token: write
-      security-events: write
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
diff --git a/.github/workflows/ci_build.yaml b/.github/workflows/ci_build.yaml
index 6000cec3..5190f321 100644
--- a/.github/workflows/ci_build.yaml
+++ b/.github/workflows/ci_build.yaml
@@ -39,6 +39,9 @@ jobs:
       matrix:
         hardware: ["cuda", "rocm", "intel-xpu", "intel-cpu"]
     uses: ./.github/workflows/build.yaml # calls the one above ^
+    permissions:
+      contents: write
+      packages: write
     with:
       hardware: ${{ matrix.hardware }}
       # https://github.com/actions/runner/issues/2206

From 21187c27c90acbec7f912b8af4feaec154de960f Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Tue, 27 Aug 2024 13:31:08 -0400
Subject: [PATCH 05/37] fix: bump minijinja version and add test for llama 3.1
 tools (#2463)

* fix: support tojson and avoid message indexing issue in template

* fix: prefer minijinja native methods and prefer workspace level dependency

* fix: adjust comment typo
---
 Cargo.lock                        |  4 ++--
 Cargo.toml                        |  2 ++
 backends/v3/Cargo.toml            |  4 ++--
 router/Cargo.toml                 |  4 ++--
 router/src/infer/chat_template.rs | 37 +++++++++++++++++++++++++++++--
 5 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index aa5cb642..02e91bc1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2169,9 +2169,9 @@ dependencies = [
 
 [[package]]
 name = "minijinja"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45f7e8e35b6c7b169bf40b0176d2c79291ab8ee53290b84e0668ab21d841aa9d"
+checksum = "6d7d3e3a3eece1fa4618237ad41e1de855ced47eab705cec1c9a920e1d1c5aad"
 dependencies = [
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 8bf75b90..79fda15d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,8 @@ tokenizers = { version = "0.19.1", features = ["http"] }
 hf-hub = { version = "0.3.1", features = ["tokio"] }
 metrics = { version = "0.23.0" }
 metrics-exporter-prometheus = { version = "0.15.1", features = [] }
+minijinja = { version = "2.2.0", features = ["json"] }
+minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
 
 [profile.release]
 incremental = true
diff --git a/backends/v3/Cargo.toml b/backends/v3/Cargo.toml
index 06a44bec..69dad072 100644
--- a/backends/v3/Cargo.toml
+++ b/backends/v3/Cargo.toml
@@ -53,8 +53,8 @@ utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
 init-tracing-opentelemetry = { version = "0.14.1", features = [
   "opentelemetry-otlp",
 ] }
-minijinja = { version = "2.0.2" }
-minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
+minijinja = { workspace = true }
+minijinja-contrib = { workspace = true }
 futures-util = "0.3.30"
 regex = "1.10.3"
 once_cell = "1.19.0"
diff --git a/router/Cargo.toml b/router/Cargo.toml
index 45acab8e..5c328e8a 100644
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@@ -46,8 +46,8 @@ ngrok = { version = "0.13.1", features = ["axum"], optional = true }
 init-tracing-opentelemetry = { version = "0.14.1", features = [
   "opentelemetry-otlp",
 ] }
-minijinja = { version = "2.0.2", features = ["json"] }
-minijinja-contrib = { version = "2.0.2", features = ["pycompat"] }
+minijinja = { workspace = true }
+minijinja-contrib = { workspace = true }
 futures-util = "0.3.30"
 regex = "1.10.3"
 once_cell = "1.19.0"
diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs
index bfa9421c..a736fc12 100644
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@@ -1,9 +1,8 @@
-use std::collections::HashSet;
-
 use crate::infer::InferError;
 use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool};
 use minijinja::{Environment, ErrorKind, Template};
 use minijinja_contrib::pycompat;
+use std::collections::HashSet;
 
 /// Raise a exception (custom function) used in the chat templates
 pub(crate) fn raise_exception(err_text: String) -> Result<String, minijinja::Error> {
@@ -872,4 +871,38 @@ mod tests {
         let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"properties\":{\"format\":{\"description\":\"The temperature unit to use. Infer this from the users location.\",\"enum\":[\"celsius\",\"fahrenheit\"],\"type\":\"string\"},\"location\":{\"description\":\"The city and state, e.g. San Francisco, CA\",\"type\":\"string\"}},\"required\":[\"location\",\"format\"],\"type\":\"object\"}}}]\nThis default prompt will be used [/INST]".to_string();
         assert_eq!(result.unwrap(), expected);
     }
+
+    #[test]
+    fn test_chat_template_with_custom_tool_template() {
+        // chat template from meta-llama/Meta-Llama-3.1-8B-Instruct
+        let ct = ChatTemplate::new(
+            "{{- bos_token }}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n".to_string(),
+            Some(TokenizerConfigToken::String("<s>".to_string())),
+            Some(TokenizerConfigToken::String("</s>".to_string())),
+        );
+        let msgs: Vec<Message> = vec![
+            Message {
+                name: None,
+                role: "system".to_string(),
+                content: MessageContent::SingleText(
+                    "Youre a helpful assistant! Answer the users question best you can."
+                        .to_string(),
+                ),
+            },
+            Message {
+                name: None,
+                role: "user".to_string(),
+                content: MessageContent::SingleText(
+                    "What is the weather like in Brooklyn, New York?".to_string(),
+                ),
+            },
+        ];
+        let tools_string = r#"[{"type": "function","function": {"name": "get_current_weather","description": "Get the current weather","parameters": {"type": "object","properties": {"location": {"type": "string","description": "The city and state, e.g. San Francisco, CA"},"format": {"type": "string","enum": ["celsius", "fahrenheit"],"description": "The temperature unit to use. Infer this from the users location."}},"required": ["location", "format"]}}}]"#.to_string();
+        let tools: Vec<Tool> = serde_json::from_str(&tools_string).unwrap();
+        let tool_prompt = "This default prompt will be used".to_string();
+        let tools_and_prompt = Some((tools, tool_prompt));
+        let result = ct.apply(None, msgs, tools_and_prompt);
+        let expected = "<s><|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYoure a helpful assistant! Answer the users question best you can.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.Do not use variables.\n\n{\n    \"function\": {\n        \"arguments\": {\n            \"properties\": {\n                \"format\": {\n                    \"description\": \"The temperature unit to use. Infer this from the users location.\",\n                    \"enum\": [\n                        \"celsius\",\n                        \"fahrenheit\"\n                    ],\n                    \"type\": \"string\"\n                },\n                \"location\": {\n                    \"description\": \"The city and state, e.g. San Francisco, CA\",\n                    \"type\": \"string\"\n                }\n            },\n            \"required\": [\n                \"location\",\n                \"format\"\n            ],\n            \"type\": \"object\"\n        },\n        \"description\": \"Get the current weather\",\n        \"name\": \"get_current_weather\"\n    },\n    \"type\": \"function\"\n}\n\nWhat is the weather like in Brooklyn, New York?\n---\nThis default prompt will be used<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n".to_string();
+        assert_eq!(result.unwrap(), expected);
+    }
 }

From 8f99f165ce1a261c89ea2edef437ef23c03a0716 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Wed, 28 Aug 2024 13:44:44 -0400
Subject: [PATCH 06/37] fix: improve regex expression (#2468)

---
 docs/source/basic_tutorials/using_guidance.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/source/basic_tutorials/using_guidance.md b/docs/source/basic_tutorials/using_guidance.md
index f09ef348..dfa3f0e4 100644
--- a/docs/source/basic_tutorials/using_guidance.md
+++ b/docs/source/basic_tutorials/using_guidance.md
@@ -157,7 +157,12 @@ from huggingface_hub import InferenceClient
 
 client = InferenceClient("http://localhost:3000")
 
-regexp = "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)"
+section_regex = "(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
+regexp = f"HELLO\.{section_regex}\.WORLD\.{section_regex}"
+
+# This is a more realistic example of an ip address regex
+# regexp = f"{section_regex}\.{section_regex}\.{section_regex}\.{section_regex}"
+
 
 resp = client.text_generation(
     f"Whats Googles DNS? Please use the following regex: {regexp}",
@@ -170,7 +175,7 @@ resp = client.text_generation(
 
 
 print(resp)
-# 7.1.1.1
+# HELLO.255.WORLD.255
 
 ```
 

From 4e821c003a7cb055a358cf142dbf01a2f4c1916f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 29 Aug 2024 16:25:25 +0200
Subject: [PATCH 07/37] nix: build Torch against MKL and various other
 improvements (#2469)

Updates tgi-nix input:

- Move Torch closer to upstream by building against MKL.
- Remove compute capability 8.7 from Torch (Jetson).
- Sync nixpkgs cumpute capabilities with Torch (avoids
  compiling too mana capabilities for MAGMA).
- Use nixpkgs configuration passed through by `tgi-nix`.
---
 flake.lock     | 6 +++---
 flake.nix      | 9 +++------
 nix/server.nix | 1 +
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/flake.lock b/flake.lock
index b40f51b3..14011768 100644
--- a/flake.lock
+++ b/flake.lock
@@ -944,11 +944,11 @@
         "nixpkgs": "nixpkgs_6"
       },
       "locked": {
-        "lastModified": 1724270760,
-        "narHash": "sha256-KX566x0+3HZcB20HPdvdwyMm7ZJg21M+iqVrs/HCimA=",
+        "lastModified": 1724784743,
+        "narHash": "sha256-NdEoWeNwR/ZstYnHaiQWIYZvr7VsrAh7g3+ZHUPrxuI=",
         "owner": "danieldk",
         "repo": "tgi-nix",
-        "rev": "12cbaa76ff258351741d3b5afb7161f617fe7b4c",
+        "rev": "c9580c3e39a855246bb87b584bbea1885b44f524",
         "type": "github"
       },
       "original": {
diff --git a/flake.nix b/flake.nix
index 83feb26a..0739c90a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -31,15 +31,12 @@
           src = ./.;
           additionalCargoNixArgs = [ "--all-features" ];
         };
-        config = {
-          allowUnfree = true;
-          cudaSupport = true;
-        };
         pkgs = import nixpkgs {
-          inherit config system;
+          inherit system;
+          inherit (tgi-nix.lib) config;
           overlays = [
             rust-overlay.overlays.default
-            tgi-nix.overlay
+            tgi-nix.overlays.default
           ];
         };
         crateOverrides = import ./nix/crate-overrides.nix { inherit pkgs nix-filter; };
diff --git a/nix/server.nix b/nix/server.nix
index 4e0fdaa1..6ee088e0 100644
--- a/nix/server.nix
+++ b/nix/server.nix
@@ -28,6 +28,7 @@
   peft,
   safetensors,
   tokenizers,
+  torch,
   sentencepiece,
   transformers,
   typer,

From e415b690a68d7a0e149c996e46def41c867ff421 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 29 Aug 2024 16:29:01 +0200
Subject: [PATCH 08/37] Lots of improvements (Still 2 allocators) (#2449)

* Making prefix/flashinfer the default and testing the full release tests.

* Include flashinfer in the docker.

* Using prebuilt.

* Allowing window_left_size (dummy version).

* Disabling flashinfer/prefix caching on odd head_dim

* Disable prefix caching for lora.

* More specific codes.

* Update lock

* Updating integration tests with new values with FI/FD.

Remove paged as a default too, and using FD everywhere.

* Update cargo lock ?

* Upgrade to 1.80 because of bitstream...

* Everywhere 1.80

* Forgot last default place.

* Apply suggestions from code review

Co-authored-by: drbh <david.richard.holtz@gmail.com>

* Updated flake lock

* Tmp

* Upgrade resolution system for less errors in resolution.

* Remove lambda for cleaner function.

* Handling debugger.

* OVerride the env in server tests.

* Is this enough to make it work ?

* This seems to be working.

* Downgrade some logs.

* Fixing the default for vlm.

* Don't enable prefix caching on VLM just yet.

* Change `add_special_tokens` in order to have the correct tokens for chat
input and not (since it's super important with the prefixing now)

* Fixing prefix caching for flashdecoding.

* Update all models.

* Fixed flashinfer version.

* add_special_tokens is internal only

* Fixing seqlen with the new vlms.

* Fixing the issue with `add_special_tokens` not being passed around.

* Fixing the test.

* Removing encoder_decoder (seq2seq).

* Update the chat test.

* Fixing the batching tokenization in flash causal lm.

* Truncating left for radix purposes.

* Oops this doesn't belong here.

* Put back default pure shell.

* Update server tests

- Default to throughput test in k6
- Use TGI_WIGGLE_ROOM to adjust wiggle room

* Only n_heads / process_group.size() are necessary.

* Revert the integrationt tests change (seem linked to head_size
modification).

* Adding error message when assert is violated.

* Fixing the free algorithm to handle times where the common prefix is
smaller.

* Apply suggestions from code review

Co-authored-by: OlivierDehaene <olivier@huggingface.co>

* Update server/text_generation_server/layers/attention/common.py

Co-authored-by: OlivierDehaene <olivier@huggingface.co>

* Fix disabling prefix caching - Fix windowing checks.

* Revert the Cohere tokenizer change (for now using a revision instead).

* Fmt.

---------

Co-authored-by: drbh <david.richard.holtz@gmail.com>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
---
 .github/workflows/tests.yaml                  |   2 +-
 Cargo.lock                                    | 437 +++++++++---------
 Dockerfile                                    |   9 +-
 Dockerfile_amd                                |   2 +-
 Dockerfile_intel                              |   2 +-
 backends/client/src/v3/client.rs              |   2 +
 backends/client/src/v3/sharded_client.rs      |   1 +
 backends/v3/src/backend.rs                    |  30 +-
 backends/v3/src/block_allocator.rs            |   5 +-
 backends/v3/src/client/grpc_client.rs         |   1 +
 backends/v3/src/client/sharded_client.rs      |   1 +
 backends/v3/src/queue.rs                      |   2 +
 backends/v3/src/radix.rs                      | 206 ++++++---
 benchmark/src/generation.rs                   |   1 +
 flake.lock                                    |   6 +-
 .../test_flash_llama_simple.json              |  12 +-
 ..._llama_completion_many_prompts_stream.json | 172 +++----
 .../test_flash_deepseek_v2.json               |  44 +-
 .../test_flash_deepseek_v2_load.json          | 152 +++---
 .../test_flash_llama_fp8_all_params.json      |  58 +--
 .../test_flash_starcoder2_default_params.json |  16 +-
 .../test_flash_idefics2_next_all_params.json  |   8 +-
 integration-tests/models/test_chat_llama.py   |   2 +-
 launcher/src/main.rs                          | 265 ++++++++---
 load_tests/common.js                          |  26 +-
 proto/v3/generate.proto                       |   2 +
 router/src/infer/mod.rs                       |   3 +-
 router/src/lib.rs                             |  21 +
 router/src/server.rs                          |   4 +
 router/src/validation.rs                      |  49 +-
 rust-toolchain.toml                           |   2 +-
 server/Makefile                               |   1 +
 server/Makefile-flashinfer                    |   2 +
 server/tests/conftest.py                      |   5 +-
 .../layers/attention/common.py                |  39 +-
 .../layers/attention/cuda.py                  |  28 +-
 .../text_generation_server/models/__init__.py |  15 +-
 .../custom_modeling/flash_cohere_modeling.py  |  23 +-
 .../custom_modeling/flash_dbrx_modeling.py    |  27 +-
 .../flash_deepseek_v2_modeling.py             |  24 +-
 .../custom_modeling/flash_gemma2_modeling.py  |  23 +-
 .../custom_modeling/flash_gemma_modeling.py   |  23 +-
 .../custom_modeling/flash_gpt2_modeling.py    |  23 +-
 .../custom_modeling/flash_gptj_modeling.py    |  25 +-
 .../custom_modeling/flash_llama_modeling.py   |  23 +-
 .../custom_modeling/flash_mistral_modeling.py |  25 +-
 .../custom_modeling/flash_mixtral_modeling.py |  25 +-
 .../custom_modeling/flash_neox_modeling.py    |  25 +-
 .../flash_pali_gemma_modeling.py              |   5 +-
 .../custom_modeling/flash_phi_modeling.py     |  23 +-
 .../custom_modeling/flash_qwen2_modeling.py   |  25 +-
 .../custom_modeling/flash_rw_modeling.py      |  33 +-
 .../flash_santacoder_modeling.py              |  23 +-
 .../flash_starcoder2_modeling.py              |  25 +-
 .../models/custom_modeling/idefics2.py        |   5 +-
 .../models/custom_modeling/llava_next.py      |   5 +-
 .../models/flash_causal_lm.py                 | 105 +++--
 .../text_generation_server/models/globals.py  |   9 +-
 .../models/vlm_causal_lm.py                   |  11 +-
 59 files changed, 1234 insertions(+), 934 deletions(-)
 create mode 100644 server/Makefile-flashinfer

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index f983b6ed..6faabe3b 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -35,7 +35,7 @@ jobs:
         with:
           # Released on: 02 May, 2024
           # https://releases.rs/docs/1.78.0/
-          toolchain: 1.79.0
+          toolchain: 1.80.0
           override: true
           components: rustfmt, clippy
       - name: Install Protoc
diff --git a/Cargo.lock b/Cargo.lock
index 02e91bc1..00c7f005 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,12 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -28,7 +34,7 @@ dependencies = [
  "once_cell",
  "serde",
  "version_check",
- "zerocopy 0.7.35",
+ "zerocopy",
 ]
 
 [[package]]
@@ -121,14 +127,14 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
 name = "arrayvec"
-version = "0.7.4"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "async-rustls"
@@ -160,7 +166,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -171,7 +177,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -257,9 +263,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-sys"
-version = "0.20.0"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e89b6941c2d1a7045538884d6e760ccfffdf8e1ffc2613d8efa74305e1f3752"
+checksum = "0f0e249228c6ad2d240c2dc94b714d711629d52bad946075d8e9b2f5391f0703"
 dependencies = [
  "bindgen",
  "cc",
@@ -402,7 +408,7 @@ dependencies = [
  "cc",
  "cfg-if",
  "libc",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "object",
  "rustc-demangle",
 ]
@@ -444,7 +450,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.72",
+ "syn 2.0.76",
  "which",
 ]
 
@@ -483,9 +489,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 
 [[package]]
 name = "bitstream-io"
-version = "2.5.0"
+version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3dcde5f311c85b8ca30c2e4198d4326bc342c76541590106f5fa4a50946ea499"
+checksum = "b81e1519b0d82120d2fd469d5bfb2919a9361c48b02d82d04befc1cdd2002452"
 
 [[package]]
 name = "block-buffer"
@@ -516,9 +522,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
 
 [[package]]
 name = "bytemuck"
-version = "1.16.1"
+version = "1.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
+checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2"
 
 [[package]]
 name = "byteorder"
@@ -534,15 +540,15 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
 
 [[package]]
 name = "bytes"
-version = "1.6.1"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952"
+checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
 
 [[package]]
 name = "camino"
-version = "1.1.7"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239"
+checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3"
 dependencies = [
  "serde",
 ]
@@ -584,12 +590,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.7"
+version = "1.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
+checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
 dependencies = [
  "jobserver",
  "libc",
+ "shlex",
 ]
 
 [[package]]
@@ -623,6 +630,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "clang-sys"
 version = "1.8.1"
@@ -647,9 +660,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.11"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -657,9 +670,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.11"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -669,14 +682,14 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.11"
+version = "4.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e"
+checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -687,9 +700,9 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "cmake"
-version = "0.1.50"
+version = "0.1.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a"
 dependencies = [
  "cc",
 ]
@@ -741,15 +754,15 @@ dependencies = [
 
 [[package]]
 name = "core-foundation-sys"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.12"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad"
 dependencies = [
  "libc",
 ]
@@ -897,19 +910,19 @@ dependencies = [
 
 [[package]]
 name = "ctrlc"
-version = "3.4.4"
+version = "3.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345"
+checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3"
 dependencies = [
- "nix",
- "windows-sys 0.52.0",
+ "nix 0.29.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
 name = "cxx"
-version = "1.0.124"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "273dcfd3acd4e1e276af13ed2a43eea7001318823e7a726a6b3ed39b4acc0b82"
+checksum = "3c4eae4b7fc8dcb0032eb3b1beee46b38d371cdeaf2d0c64b9944f6f69ad7755"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -919,9 +932,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.124"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8b2766fbd92be34e9ed143898fce6c572dc009de39506ed6903e5a05b68914e"
+checksum = "6c822bf7fb755d97328d6c337120b6f843678178751cba33c9da25cf522272e0"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -929,24 +942,24 @@ dependencies = [
  "proc-macro2",
  "quote",
  "scratch",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.124"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "839fcd5e43464614ffaa989eaf1c139ef1f0c51672a1ed08023307fa1b909ccd"
+checksum = "719d6197dc016c88744aff3c0d0340a01ecce12e8939fc282e7c8f583ee64bc6"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.124"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b2c1c1776b986979be68bb2285da855f8d8a35851a769fca8740df7c3d07877"
+checksum = "35de3b547387863c8f82013c4f79f1c2162edee956383e4089e1d04c18c4f16c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -970,7 +983,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -981,7 +994,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1011,7 +1024,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1021,7 +1034,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
 dependencies = [
  "derive_builder_core",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1057,9 +1070,9 @@ dependencies = [
 
 [[package]]
 name = "dunce"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
 
 [[package]]
 name = "easy-cast"
@@ -1126,7 +1139,7 @@ dependencies = [
  "flume",
  "half 2.4.1",
  "lebe",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
  "rayon-core",
  "smallvec",
  "zune-inflate",
@@ -1144,9 +1157,9 @@ dependencies = [
 
 [[package]]
 name = "fastrand"
-version = "2.1.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
+checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
 
 [[package]]
 name = "fdeflate"
@@ -1165,12 +1178,12 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.0.30"
+version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253"
 dependencies = [
  "crc32fast",
- "miniz_oxide",
+ "miniz_oxide 0.8.0",
 ]
 
 [[package]]
@@ -1296,7 +1309,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1405,7 +1418,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1414,9 +1427,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
+checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1424,7 +1437,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.1.0",
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1631,7 +1644,7 @@ dependencies = [
  "bytes",
  "futures-channel",
  "futures-util",
- "h2 0.4.5",
+ "h2 0.4.6",
  "http 1.1.0",
  "http-body 1.0.1",
  "httparse",
@@ -1689,9 +1702,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.6"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956"
+checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1774,9 +1787,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.5",
@@ -1832,7 +1845,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -1915,9 +1928,9 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
 
 [[package]]
 name = "js-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
+checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -1932,7 +1945,7 @@ dependencies = [
  "anyhow",
  "base64 0.21.7",
  "bytecount",
- "clap 4.5.11",
+ "clap 4.5.16",
  "fancy-regex",
  "fraction",
  "getrandom",
@@ -1972,9 +1985,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libfuzzer-sys"
@@ -2126,7 +2139,7 @@ dependencies = [
  "hyper 1.4.1",
  "hyper-rustls",
  "hyper-util",
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "ipnet",
  "metrics",
  "metrics-util",
@@ -2179,9 +2192,9 @@ dependencies = [
 
 [[package]]
 name = "minijinja-contrib"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6853ef2340c668281c5ea86b04da2ebb2fc9e98a7185a887591de4cac945d5b5"
+checksum = "744a2b84dbd22398e347594ed2aef9d3f1b948934e3e6e94ef69ecd39d597f4b"
 dependencies = [
  "minijinja",
  "serde",
@@ -2203,6 +2216,15 @@ dependencies = [
  "simd-adler32",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
+dependencies = [
+ "adler2",
+]
+
 [[package]]
 name = "mio"
 version = "0.8.11"
@@ -2217,9 +2239,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4"
+checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
  "hermit-abi 0.3.9",
  "libc",
@@ -2251,7 +2273,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -2341,7 +2363,19 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
 dependencies = [
  "bitflags 2.6.0",
  "cfg-if",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
+ "libc",
+]
+
+[[package]]
+name = "nix"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
+dependencies = [
+ "bitflags 2.6.0",
+ "cfg-if",
+ "cfg_aliases 0.2.1",
  "libc",
 ]
 
@@ -2439,7 +2473,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -2510,9 +2544,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
 [[package]]
 name = "object"
-version = "0.36.2"
+version = "0.36.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e"
+checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
 dependencies = [
  "memchr",
 ]
@@ -2574,7 +2608,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -2613,7 +2647,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a"
 dependencies = [
  "futures-core",
  "futures-sink",
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "js-sys",
  "once_cell",
  "pin-project-lite",
@@ -2837,7 +2871,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
 ]
 
 [[package]]
@@ -2857,7 +2891,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -2916,7 +2950,7 @@ dependencies = [
  "crc32fast",
  "fdeflate",
  "flate2",
- "miniz_oxide",
+ "miniz_oxide 0.7.4",
 ]
 
 [[package]]
@@ -2933,21 +2967,21 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.18"
+version = "0.2.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dee4364d9f3b902ef14fab8a1ddffb783a1cb6b4bba3bfc1fa3922732c7de97f"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
 dependencies = [
- "zerocopy 0.6.6",
+ "zerocopy",
 ]
 
 [[package]]
 name = "prettyplease"
-version = "0.2.20"
+version = "0.2.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e"
+checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
 dependencies = [
  "proc-macro2",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -2999,7 +3033,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
 dependencies = [
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3039,7 +3073,7 @@ dependencies = [
  "prost 0.12.6",
  "prost-types",
  "regex",
- "syn 2.0.72",
+ "syn 2.0.76",
  "tempfile",
 ]
 
@@ -3066,7 +3100,7 @@ dependencies = [
  "itertools 0.12.1",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3110,9 +3144,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
 
 [[package]]
 name = "quote"
-version = "1.0.36"
+version = "1.0.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
 dependencies = [
  "proc-macro2",
 ]
@@ -3201,9 +3235,9 @@ dependencies = [
 
 [[package]]
 name = "ravif"
-version = "0.11.9"
+version = "0.11.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5797d09f9bd33604689e87e8380df4951d4912f01b63f71205e2abd4ae25e6b6"
+checksum = "a8f0bfd976333248de2078d350bfdf182ff96e168a24d23d2436cef320dd4bdd"
 dependencies = [
  "avif-serialize",
  "imgref",
@@ -3264,9 +3298,9 @@ dependencies = [
 
 [[package]]
 name = "redox_users"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
+checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
  "getrandom",
  "libredox",
@@ -3275,9 +3309,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.5"
+version = "1.10.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3359,9 +3393,9 @@ dependencies = [
 
 [[package]]
 name = "rgb"
-version = "0.8.45"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ade4539f42266ded9e755c605bdddf546242b2c961b03b06a7375260788a0523"
+checksum = "0f86ae463694029097b846d8f99fd5536740602ae00022c0c50c5600720b2f71"
 dependencies = [
  "bytemuck",
 ]
@@ -3416,7 +3450,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rust-embed-utils",
- "syn 2.0.72",
+ "syn 2.0.76",
  "walkdir",
 ]
 
@@ -3507,12 +3541,12 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.7.1"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba"
+checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa"
 dependencies = [
  "openssl-probe",
- "rustls-pemfile 2.1.2",
+ "rustls-pemfile 2.1.3",
  "rustls-pki-types",
  "schannel",
  "security-framework",
@@ -3529,9 +3563,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pemfile"
-version = "2.1.2"
+version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d"
+checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425"
 dependencies = [
  "base64 0.22.1",
  "rustls-pki-types",
@@ -3539,15 +3573,15 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.7.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d"
+checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0"
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.6"
+version = "0.102.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e"
+checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56"
 dependencies = [
  "aws-lc-rs",
  "ring 0.17.8",
@@ -3641,9 +3675,9 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.204"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
+checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
 dependencies = [
  "serde_derive",
 ]
@@ -3660,20 +3694,20 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.204"
+version = "1.0.209"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
+checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.121"
+version = "1.0.127"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ab380d7d9f22ef3f21ad3e6c1ebe8e4fc7a2000ccba2e4d71fc96f15b2cb609"
+checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
 dependencies = [
  "itoa",
  "memchr",
@@ -3875,7 +3909,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -3897,9 +3931,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3993,20 +4027,21 @@ dependencies = [
 
 [[package]]
 name = "target-lexicon"
-version = "0.12.15"
+version = "0.12.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2"
+checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
 
 [[package]]
 name = "tempfile"
-version = "3.10.1"
+version = "3.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
+checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
 dependencies = [
  "cfg-if",
  "fastrand",
+ "once_cell",
  "rustix",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4024,7 +4059,7 @@ version = "2.2.1-dev0"
 dependencies = [
  "async-stream",
  "async-trait",
- "clap 4.5.11",
+ "clap 4.5.16",
  "cmake",
  "cxx",
  "cxx-build",
@@ -4046,7 +4081,7 @@ name = "text-generation-benchmark"
 version = "2.2.1-dev0"
 dependencies = [
  "average",
- "clap 4.5.11",
+ "clap 4.5.16",
  "crossterm",
  "float-ord",
  "hf-hub",
@@ -4084,11 +4119,11 @@ dependencies = [
 name = "text-generation-launcher"
 version = "2.2.1-dev0"
 dependencies = [
- "clap 4.5.11",
+ "clap 4.5.16",
  "ctrlc",
  "float_eq",
  "hf-hub",
- "nix",
+ "nix 0.28.0",
  "once_cell",
  "reqwest",
  "serde",
@@ -4108,7 +4143,7 @@ dependencies = [
  "axum 0.7.5",
  "axum-tracing-opentelemetry",
  "base64 0.22.1",
- "clap 4.5.11",
+ "clap 4.5.16",
  "csv",
  "futures",
  "futures-util",
@@ -4156,7 +4191,7 @@ dependencies = [
  "axum 0.7.5",
  "axum-tracing-opentelemetry",
  "base64 0.22.1",
- "clap 4.5.11",
+ "clap 4.5.16",
  "criterion",
  "futures",
  "futures-util",
@@ -4224,7 +4259,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4341,14 +4376,14 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.39.2"
+version = "1.39.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1"
+checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
- "mio 1.0.1",
+ "mio 1.0.2",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
@@ -4375,7 +4410,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4437,9 +4472,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.8.16"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81967dd0dd2c1ab0bc3468bd7caecc32b8a4aa47d0c8c695d8c2b2108168d62c"
+checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
 dependencies = [
  "serde",
  "serde_spanned",
@@ -4449,20 +4484,20 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.7"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
 dependencies = [
  "serde",
 ]
 
 [[package]]
 name = "toml_edit"
-version = "0.22.17"
+version = "0.22.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d9f8729f5aea9562aac1cc0441f5d6de3cff1ee0c5d67293eeca5eb36ee7c16"
+checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
 dependencies = [
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -4534,7 +4569,7 @@ dependencies = [
  "proc-macro2",
  "prost-build",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4575,15 +4610,15 @@ dependencies = [
 
 [[package]]
 name = "tower-layer"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 
 [[package]]
 name = "tower-service"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
@@ -4605,7 +4640,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4865,7 +4900,7 @@ version = "4.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23"
 dependencies = [
- "indexmap 2.2.6",
+ "indexmap 2.4.0",
  "serde",
  "serde_json",
  "utoipa-gen",
@@ -4881,7 +4916,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -4919,7 +4954,7 @@ checksum = "ee1cd046f83ea2c4e920d6ee9f7c3537ef928d75dce5d84a87c2c5d6b3999a3a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -5000,34 +5035,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
+checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5"
 dependencies = [
  "cfg-if",
+ "once_cell",
  "wasm-bindgen-macro",
 ]
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
+checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.42"
+version = "0.4.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0"
+checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -5037,9 +5073,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
+checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -5047,28 +5083,28 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
+checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.92"
+version = "0.2.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
+checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484"
 
 [[package]]
 name = "web-sys"
-version = "0.3.69"
+version = "0.3.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
+checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -5149,11 +5185,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -5208,6 +5244,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-targets"
 version = "0.42.2"
@@ -5388,9 +5433,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "winnow"
-version = "0.6.16"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b480ae9340fc261e6be3e95a1ba86d54ae3f9171132a73ce8d4bbaf68339507c"
+checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f"
 dependencies = [
  "memchr",
 ]
@@ -5405,34 +5450,14 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
-[[package]]
-name = "zerocopy"
-version = "0.6.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6"
-dependencies = [
- "byteorder",
- "zerocopy-derive 0.6.6",
-]
-
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
 dependencies = [
- "zerocopy-derive 0.7.35",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.6.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.72",
+ "byteorder",
+ "zerocopy-derive",
 ]
 
 [[package]]
@@ -5443,7 +5468,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
@@ -5463,7 +5488,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.76",
 ]
 
 [[package]]
diff --git a/Dockerfile b/Dockerfile
index 4c64a643..0d0e89b1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -184,6 +184,12 @@ WORKDIR /usr/src
 COPY server/Makefile-selective-scan Makefile
 RUN make build-all
 
+# Build flashinfer
+FROM kernel-builder AS flashinfer-builder
+WORKDIR /usr/src
+COPY server/Makefile-flashinfer Makefile
+RUN make install-flashinfer
+
 # Text Generation Inference base image
 FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
 
@@ -236,6 +242,7 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c
 # Copy build artifacts from mamba builder
 COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
 COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
+COPY --from=flashinfer-builder /opt/conda/lib/python3.10/site-packages/flashinfer/ /opt/conda/lib/python3.10/site-packages/flashinfer/
 
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
diff --git a/Dockerfile_amd b/Dockerfile_amd
index cdad0d28..8cb699dd 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -1,5 +1,5 @@
 # Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
diff --git a/Dockerfile_intel b/Dockerfile_intel
index 12480c70..9af6422c 100644
--- a/Dockerfile_intel
+++ b/Dockerfile_intel
@@ -1,6 +1,6 @@
 ARG PLATFORM=xpu
 
-FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
diff --git a/backends/client/src/v3/client.rs b/backends/client/src/v3/client.rs
index b321278c..479d31bf 100644
--- a/backends/client/src/v3/client.rs
+++ b/backends/client/src/v3/client.rs
@@ -153,6 +153,8 @@ impl Client {
                 }),
                 // We truncate the input on the server side to be sure that it has the correct size
                 truncate,
+                // Most request will have that
+                add_special_tokens: true,
                 // Blocks and slots will be set on the server side if we use paged attention
                 blocks: vec![],
                 slots: vec![],
diff --git a/backends/client/src/v3/sharded_client.rs b/backends/client/src/v3/sharded_client.rs
index 1cc173e3..645c076a 100644
--- a/backends/client/src/v3/sharded_client.rs
+++ b/backends/client/src/v3/sharded_client.rs
@@ -221,6 +221,7 @@ impl Health for ShardedClient {
                 chunks: vec![Chunk::Text("liveness".into()).into()],
             }),
             truncate: 10,
+            add_special_tokens: true,
             prefill_logprobs: false,
             parameters: Some(NextTokenChooserParameters {
                 temperature: 1.0,
diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs
index cbcbff72..05a26370 100644
--- a/backends/v3/src/backend.rs
+++ b/backends/v3/src/backend.rs
@@ -35,27 +35,15 @@ impl BackendV3 {
         window_size: Option<u32>,
         speculate: u32,
     ) -> Self {
-        let prefix_caching = if let Ok(prefix_caching) = std::env::var("USE_PREFIX_CACHING") {
-            matches!(prefix_caching.as_str(), "true" | "1")
-        } else {
-            false
-        };
-        let attention = if let Ok(attention) = std::env::var("ATTENTION") {
-            attention
-                .parse()
-                .unwrap_or_else(|_| panic!("Invalid attention was specified :`{attention}`"))
-        } else if prefix_caching {
-            Attention::FlashInfer
-        } else {
-            Attention::Paged
-        };
-        let block_size = if attention == Attention::FlashDecoding {
-            256
-        } else if attention == Attention::FlashInfer {
-            1
-        } else {
-            16
-        };
+        let prefix_caching =
+            std::env::var("USE_PREFIX_CACHING").expect("Expect prefix caching env var");
+        let prefix_caching = matches!(prefix_caching.as_str(), "true" | "1");
+        let attention: String = std::env::var("ATTENTION").expect("attention env var");
+
+        let attention: Attention = attention
+            .parse()
+            .unwrap_or_else(|_| panic!("Invalid attention was specified :`{attention}`"));
+        let block_size = attention.block_size();
 
         let queue = Queue::new(
             requires_padding,
diff --git a/backends/v3/src/block_allocator.rs b/backends/v3/src/block_allocator.rs
index c5503b8c..4fea172b 100644
--- a/backends/v3/src/block_allocator.rs
+++ b/backends/v3/src/block_allocator.rs
@@ -1,4 +1,4 @@
-use std::{cmp::min, sync::Arc};
+use std::sync::Arc;
 use tokio::sync::{mpsc, oneshot};
 
 use crate::radix::RadixAllocator;
@@ -137,7 +137,6 @@ pub trait Allocator {
 
     fn free(&mut self, blocks: Vec<u32>, allocation_id: u64);
 }
-
 pub struct SimpleAllocator {
     free_blocks: Vec<u32>,
     block_size: u32,
@@ -167,7 +166,7 @@ impl Allocator for SimpleAllocator {
                 None => (tokens, 1),
                 Some(window_size) => {
                     let repeats = (tokens + window_size - 1) / window_size;
-                    let tokens = min(tokens, window_size);
+                    let tokens = core::cmp::min(tokens, window_size);
                     (tokens, repeats as usize)
                 }
             };
diff --git a/backends/v3/src/client/grpc_client.rs b/backends/v3/src/client/grpc_client.rs
index 6282759e..648662db 100644
--- a/backends/v3/src/client/grpc_client.rs
+++ b/backends/v3/src/client/grpc_client.rs
@@ -149,6 +149,7 @@ impl Client {
             requests.push(Request {
                 id: 0,
                 inputs,
+                add_special_tokens: true,
                 input_chunks: Some(Input {
                     chunks: input_chunks,
                 }),
diff --git a/backends/v3/src/client/sharded_client.rs b/backends/v3/src/client/sharded_client.rs
index 2f78da03..ea77a696 100644
--- a/backends/v3/src/client/sharded_client.rs
+++ b/backends/v3/src/client/sharded_client.rs
@@ -222,6 +222,7 @@ impl Health for ShardedClient {
                 chunks: vec![Chunk::Text("liveness".into()).into()],
             }),
             truncate: 10,
+            add_special_tokens: true,
             prefill_logprobs: false,
             parameters: Some(NextTokenChooserParameters {
                 temperature: 1.0,
diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs
index faa57c11..2a8c4c53 100644
--- a/backends/v3/src/queue.rs
+++ b/backends/v3/src/queue.rs
@@ -383,6 +383,7 @@ impl State {
                 }),
                 inputs: entry.request.inputs.chunks_to_string(),
                 truncate: entry.request.truncate,
+                add_special_tokens: entry.request.add_special_tokens,
                 parameters: Some(NextTokenChooserParameters::from(
                     entry.request.parameters.clone(),
                 )),
@@ -517,6 +518,7 @@ mod tests {
                 inputs: vec![],
                 input_ids: Some(Arc::new(vec![])),
                 input_length: 0,
+                add_special_tokens: true,
                 truncate: 0,
                 decoder_input_details: false,
                 parameters: ValidParameters {
diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index 5bac1a31..b85be00b 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -1,12 +1,10 @@
+use crate::block_allocator::{Allocator, BlockAllocation};
+use slotmap::{DefaultKey, SlotMap};
 use std::{
     collections::{BTreeSet, HashMap},
     sync::Arc,
 };
 
-use slotmap::{DefaultKey, SlotMap};
-
-use crate::block_allocator::{Allocator, BlockAllocation};
-
 pub struct RadixAllocator {
     allocation_id: u64,
 
@@ -16,26 +14,26 @@ pub struct RadixAllocator {
 
     /// Blocks that are immediately available for allocation.
     free_blocks: Vec<u32>,
+
+    #[allow(dead_code)]
+    // This isn't used because the prefix need to match without the windowing
+    // mecanism. This at worst is overallocating, not necessarily being wrong.
+    window_size: Option<u32>,
+
+    block_size: u32,
 }
 
 impl RadixAllocator {
     pub fn new(block_size: u32, n_blocks: u32, window_size: Option<u32>) -> Self {
-        assert_eq!(
-            block_size, 1,
-            "Radix tree allocator only works with block_size=1, was: {}",
-            block_size
-        );
-        if window_size.is_some() {
-            unimplemented!("Window size not supported in the prefix-caching block allocator yet");
-        }
-
         RadixAllocator {
             allocation_id: 0,
             allocations: HashMap::new(),
-            cache_blocks: RadixTrie::new(),
+            cache_blocks: RadixTrie::new(block_size as usize),
 
             // Block 0 is reserved for health checks.
             free_blocks: (1..n_blocks).collect(),
+            window_size,
+            block_size,
         }
     }
 
@@ -63,6 +61,7 @@ impl RadixAllocator {
     }
 }
 
+// Allocator trait
 impl Allocator for RadixAllocator {
     fn allocate(
         &mut self,
@@ -86,10 +85,12 @@ impl Allocator for RadixAllocator {
             .incref(prefix_node)
             .expect("Failed to increment refcount");
 
-        let prefix_len = blocks.len();
+        let prefix_len = blocks.len() * self.block_size as usize;
         let suffix_len = tokens - prefix_len as u32;
 
-        match self.alloc_or_reclaim(suffix_len as usize) {
+        let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
+
+        match self.alloc_or_reclaim(suffix_blocks as usize) {
             Some(suffix_blocks) => blocks.extend(suffix_blocks),
             None => {
                 self.cache_blocks
@@ -100,7 +101,20 @@ impl Allocator for RadixAllocator {
         }
 
         // 1:1 mapping of blocks and slots.
-        let slots = blocks.clone();
+        let slots = if self.block_size == 1 {
+            blocks.clone()
+        } else {
+            let mut slots = Vec::with_capacity(blocks.len() * self.block_size as usize);
+            'slots: for block_id in &blocks {
+                for s in (block_id * self.block_size)..((block_id + 1) * self.block_size) {
+                    slots.push(s);
+                    if slots.len() as u32 == tokens {
+                        break 'slots;
+                    }
+                }
+            }
+            slots
+        };
 
         let allocation = RadixAllocation {
             prefix_node,
@@ -108,6 +122,8 @@ impl Allocator for RadixAllocator {
             prefill_tokens: prefill_tokens.clone(),
         };
 
+        tracing::debug!("Blocks {blocks:?}");
+
         self.allocation_id += 1;
         self.allocations.insert(self.allocation_id, allocation);
 
@@ -136,27 +152,38 @@ impl Allocator for RadixAllocator {
             // If there are prefill tokens that did not come from the cache,
             // add them to the cache.
             if prefill_tokens.len() > allocation.cached_prefix_len {
-                let prefix_len = self
-                    .cache_blocks
-                    .insert(prefill_tokens, &blocks[..prefill_tokens.len()])
-                    // Unwrap, failing is a programming error.
-                    .expect("Failed to store prefill tokens");
-
-                // We can have a prefill with the following structure:
-                //
-                // |---| From the prefix cache.
-                // A B C D E F G
-                //|--------| Found in the trie during insertion.
-                //
-                // This means that while processing this request there was a
-                // partially overlapping request that had A..=E in its
-                // prefill. In this case we need to free the blocks D E.
-                self.free_blocks
-                    .extend(&blocks[allocation.cached_prefix_len..prefix_len]);
+                let aligned =
+                    (prefill_tokens.len() / self.block_size as usize) * self.block_size as usize;
+                if aligned > 0 {
+                    let prefix_len = self
+                        .cache_blocks
+                        .insert(
+                            &prefill_tokens[..aligned],
+                            &blocks[..aligned / self.block_size as usize],
+                        )
+                        // Unwrap, failing is a programming error.
+                        .expect("Failed to store prefill tokens");
+                    // We can have a prefill with the following structure:
+                    //
+                    // |---| From the prefix cache.
+                    // A B C D E F G
+                    //|--------| Found in the trie during insertion.
+                    //
+                    // This means that while processing this request there was a
+                    // partially overlapping request that had A..=E in its
+                    // prefill. In this case we need to free the blocks D E.
+                    if prefix_len > allocation.cached_prefix_len {
+                        self.free_blocks.extend(
+                            &blocks[allocation.cached_prefix_len / self.block_size as usize
+                                ..prefix_len / self.block_size as usize],
+                        );
+                    }
+                }
             }
 
             // Free non-prefill blocks.
-            self.free_blocks.extend(&blocks[prefill_tokens.len()..]);
+            self.free_blocks
+                .extend(&blocks[prefill_tokens.len() / self.block_size as usize..]);
         } else {
             self.free_blocks.extend(blocks);
         }
@@ -204,17 +231,14 @@ pub struct RadixTrie {
     /// Time as a monotonically increating counter to avoid the system
     /// call that a real time lookup would require.
     time: u64,
-}
 
-impl Default for RadixTrie {
-    fn default() -> Self {
-        Self::new()
-    }
+    /// All blocks need to be aligned with this
+    block_size: usize,
 }
 
 impl RadixTrie {
     /// Construct a new radix trie.
-    pub fn new() -> Self {
+    pub fn new(block_size: usize) -> Self {
         let root = TrieNode::new(vec![], vec![], 0, None);
         let mut nodes = SlotMap::new();
         let root = nodes.insert(root);
@@ -223,13 +247,14 @@ impl RadixTrie {
             nodes,
             root,
             time: 0,
+            block_size,
         }
     }
 
     /// Find the prefix of the given tokens.
     ///
     /// The blocks corresponding to the part of the prefix that could be found
-    /// are writteng to `blocks`. The number of blocks is in `0..=tokens.len()`.
+    /// are written to `blocks`. The number of blocks is in `0..=tokens.len()`.
     /// Returns the identifier of the trie node that contains the longest
     /// prefix. The node identifier can be used by callers to e.g. increase its
     /// reference count.
@@ -247,8 +272,9 @@ impl RadixTrie {
         if let Some(&child_id) = node.children.get(&key[0]) {
             self.update_access_time(child_id);
             let child = self.nodes.get(child_id).expect("Invalid child identifier");
-            let shared_prefix_len = child.key.shared_prefix_len(key);
-            blocks.extend(&child.blocks[..shared_prefix_len]);
+            let shared_prefix_len = shared_prefix(&child.key, key, self.block_size);
+            assert_eq!(shared_prefix_len % self.block_size, 0);
+            blocks.extend(&child.blocks[..shared_prefix_len / self.block_size]);
 
             let key = &key[shared_prefix_len..];
             if !key.is_empty() {
@@ -349,7 +375,8 @@ impl RadixTrie {
     /// the first 10 elements of the tree **the blocks are not updated**.
     pub fn insert(&mut self, tokens: &[u32], blocks: &[u32]) -> Result<usize, TrieError> {
         self.time += 1;
-        self.insert_(self.root, tokens, blocks)
+        let common = self.insert_(self.root, tokens, blocks)?;
+        Ok(common)
     }
 
     /// Insertion worker.
@@ -363,7 +390,7 @@ impl RadixTrie {
         // the part of the prefix that is already in the trie to detect
         // mismatches.
 
-        if tokens.len() != blocks.len() {
+        if tokens.len() != blocks.len() * self.block_size {
             return Err(TrieError::BlockTokenCountMismatch);
         }
 
@@ -374,10 +401,10 @@ impl RadixTrie {
                 .get_mut(child_id)
                 // Unwrap here, since failure is a bug.
                 .expect("Child node does not exist");
-            let shared_prefix_len = child.key.shared_prefix_len(tokens);
+            let shared_prefix_len = shared_prefix(&child.key, tokens, self.block_size);
 
             // We are done, the prefix is already in the trie.
-            if shared_prefix_len == tokens.len() {
+            if shared_prefix_len == tokens.len() || shared_prefix_len == 0 {
                 return Ok(shared_prefix_len);
             }
 
@@ -387,7 +414,7 @@ impl RadixTrie {
                     + self.insert_(
                         child_id,
                         &tokens[shared_prefix_len..],
-                        &blocks[shared_prefix_len..],
+                        &blocks[shared_prefix_len / self.block_size..],
                     )?);
             }
 
@@ -396,7 +423,7 @@ impl RadixTrie {
             // remainder of the prefix into the node again
             let child_id = self.split_node(child_id, shared_prefix_len);
             let key = &tokens[shared_prefix_len..];
-            let blocks = &blocks[shared_prefix_len..];
+            let blocks = &blocks[shared_prefix_len / self.block_size..];
             Ok(shared_prefix_len + self.insert_(child_id, key, blocks)?)
         } else {
             self.add_node(node_id, tokens, blocks);
@@ -550,34 +577,53 @@ impl TrieNode {
     }
 }
 
-/// Helper trait to get the length of the shared prefix of two sequences.
-trait SharedPrefixLen {
-    fn shared_prefix_len(&self, other: &Self) -> usize;
-}
-
-impl<T> SharedPrefixLen for [T]
-where
-    T: PartialEq,
-{
-    fn shared_prefix_len(&self, other: &Self) -> usize {
-        self.iter().zip(other).take_while(|(a, b)| a == b).count()
-    }
+fn shared_prefix(left: &[u32], right: &[u32], block_size: usize) -> usize {
+    let full = left.iter().zip(right).take_while(|(a, b)| a == b).count();
+    (full / block_size) * block_size
 }
 
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
 
-    use crate::block_allocator::Allocator;
+    use super::*;
 
-    use super::RadixAllocator;
+    #[test]
+    fn allocator_block_size() {
+        let mut cache = RadixAllocator::new(2, 12, None);
+        let allocation = cache.allocate(8, Some(Arc::new(vec![0, 1, 2, 3]))).unwrap();
+        assert_eq!(allocation.blocks, vec![8, 9, 10, 11]);
+        assert_eq!(allocation.slots, vec![16, 17, 18, 19, 20, 21, 22, 23]);
+        assert_eq!(allocation.prefix_len, 0);
+        cache.free(allocation.blocks.clone(), allocation.allocation_id);
+
+        let allocation = cache.allocate(8, Some(Arc::new(vec![0, 1, 2, 3]))).unwrap();
+        assert_eq!(allocation.blocks, vec![8, 9, 10, 11]);
+        assert_eq!(allocation.slots, vec![16, 17, 18, 19, 20, 21, 22, 23]);
+        assert_eq!(allocation.prefix_len, 4);
+    }
+
+    #[test]
+    fn allocator_block_size_non_aligned() {
+        let mut cache = RadixAllocator::new(2, 12, None);
+        let allocation = cache.allocate(7, Some(Arc::new(vec![0, 1, 2]))).unwrap();
+        assert_eq!(allocation.blocks, vec![8, 9, 10, 11]);
+        assert_eq!(allocation.slots, vec![16, 17, 18, 19, 20, 21, 22]);
+        assert_eq!(allocation.prefix_len, 0);
+        cache.free(allocation.blocks.clone(), allocation.allocation_id);
+
+        let allocation = cache.allocate(7, Some(Arc::new(vec![0, 1, 2]))).unwrap();
+        assert_eq!(allocation.blocks, vec![8, 9, 10, 11]);
+        assert_eq!(allocation.slots, vec![16, 17, 18, 19, 20, 21, 22]);
+        assert_eq!(allocation.prefix_len, 2);
+    }
 
     #[test]
     fn allocator_reuses_prefixes() {
         let mut cache = RadixAllocator::new(1, 12, None);
         let allocation = cache.allocate(8, Some(Arc::new(vec![0, 1, 2, 3]))).unwrap();
         assert_eq!(allocation.blocks, vec![4, 5, 6, 7, 8, 9, 10, 11]);
-        assert_eq!(allocation.slots, allocation.slots);
+        assert_eq!(allocation.blocks, allocation.slots);
         assert_eq!(allocation.prefix_len, 0);
         cache.free(allocation.blocks.clone(), allocation.allocation_id);
 
@@ -666,7 +712,7 @@ mod tests {
 
     #[test]
     fn trie_insertions_have_correct_prefix_len() {
-        let mut trie = super::RadixTrie::new();
+        let mut trie = RadixTrie::new(1);
 
         assert_eq!(trie.insert(&[0, 1, 2], &[0, 1, 2]).unwrap(), 0);
 
@@ -687,9 +733,33 @@ mod tests {
         );
     }
 
+    #[test]
+    fn trie_insertions_block_size() {
+        let mut trie = RadixTrie::new(2);
+
+        assert_eq!(trie.insert(&[0, 1, 2, 3], &[0, 1]).unwrap(), 0);
+
+        // Already exists.
+        // But needs to be block_size aligned
+        assert_eq!(trie.insert(&[0, 1, 2, 3], &[0, 1]).unwrap(), 4);
+
+        // Completely new at root-level
+        assert_eq!(trie.insert(&[1, 2, 3, 4], &[1, 2]).unwrap(), 0);
+
+        // Contains full prefix, but longer.
+        assert_eq!(trie.insert(&[0, 1, 2, 3, 4, 5], &[0, 1, 2]).unwrap(), 4);
+
+        // Shares partial prefix, we need a split.
+        assert_eq!(
+            trie.insert(&[0, 1, 3, 4, 5, 6, 7, 8], &[0, 1, 2, 3])
+                .unwrap(),
+            2
+        );
+    }
+
     #[test]
     fn trie_get_returns_correct_blocks() {
-        let mut trie = super::RadixTrie::new();
+        let mut trie = RadixTrie::new(1);
         trie.insert(&[0, 1, 2], &[0, 1, 2]).unwrap();
         trie.insert(&[1, 2, 3], &[1, 2, 3]).unwrap();
         trie.insert(&[0, 1, 2, 3, 4], &[0, 1, 2, 3, 4]).unwrap();
@@ -723,7 +793,7 @@ mod tests {
 
     #[test]
     fn trie_evict_removes_correct_blocks() {
-        let mut trie = super::RadixTrie::new();
+        let mut trie = RadixTrie::new(1);
         trie.insert(&[0, 1, 2], &[0, 1, 2]).unwrap();
         trie.insert(&[0, 1, 2, 3, 5, 6, 7], &[0, 1, 2, 3, 5, 6, 7])
             .unwrap();
diff --git a/benchmark/src/generation.rs b/benchmark/src/generation.rs
index 7494d5b5..789c7b51 100644
--- a/benchmark/src/generation.rs
+++ b/benchmark/src/generation.rs
@@ -148,6 +148,7 @@ async fn prefill(
             }),
             inputs: sequence.clone(),
             truncate: sequence_length,
+            add_special_tokens: true,
             parameters: Some(parameters.clone()),
             stopping_parameters: Some(StoppingCriteriaParameters {
                 max_new_tokens: decode_length,
diff --git a/flake.lock b/flake.lock
index 14011768..c0a696b1 100644
--- a/flake.lock
+++ b/flake.lock
@@ -835,11 +835,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1724206841,
-        "narHash": "sha256-L8dKaX4T3k+TR2fEHCfGbH4UXdspovz/pj87iai9qmc=",
+        "lastModified": 1724638882,
+        "narHash": "sha256-ap2jIQi/FuUHR6HCht6ASWhoz8EiB99XmI8Esot38VE=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "45e98fbd62c32e5927e952d2833fa1ba4fb35a61",
+        "rev": "19b70f147b9c67a759e35824b241f1ed92e46694",
         "type": "github"
       },
       "original": {
diff --git a/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json b/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json
index 8631c076..5553e17d 100644
--- a/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json
+++ b/integration-tests/models/__snapshots__/test_chat_llama/test_flash_llama_simple.json
@@ -5,7 +5,7 @@
       "index": 0,
       "logprobs": null,
       "message": {
-        "content": "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to explore in the middle of urban confines. In fact, typical times for humidity levels in Brooklyn include:\n\n- Early morning: 80-85% humidity, with occas",
+        "content": "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C",
         "name": null,
         "role": "assistant",
         "tool_calls": null
@@ -13,14 +13,14 @@
       "usage": null
     }
   ],
-  "created": 1716553098,
+  "created": 1724792495,
   "id": "",
   "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-  "object": "text_completion",
-  "system_fingerprint": "2.0.5-dev0-native",
+  "object": "chat.completion",
+  "system_fingerprint": "2.2.1-dev0-native",
   "usage": {
     "completion_tokens": 100,
-    "prompt_tokens": 62,
-    "total_tokens": 162
+    "prompt_tokens": 61,
+    "total_tokens": 161
   }
 }
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
index d87071cf..e7fb5740 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
@@ -8,11 +8,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -23,11 +23,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -38,11 +38,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -53,11 +53,11 @@
         "text": "hd"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -68,11 +68,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -83,11 +83,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -98,11 +98,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -113,11 +113,11 @@
         "text": "aho"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -128,11 +128,11 @@
         "text": "2"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -143,11 +143,11 @@
         "text": "2"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -158,11 +158,11 @@
         "text": "2"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -173,11 +173,11 @@
         "text": "ima"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -188,11 +188,11 @@
         "text": "."
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -203,11 +203,11 @@
         "text": "."
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -218,11 +218,11 @@
         "text": "."
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -233,11 +233,11 @@
         "text": "\n"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -248,11 +248,11 @@
         "text": " Sarah"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -263,11 +263,11 @@
         "text": " Yes"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -278,11 +278,11 @@
         "text": " And"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -293,11 +293,11 @@
         "text": "i"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -308,11 +308,11 @@
         "text": "'"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -323,11 +323,11 @@
         "text": ","
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -338,11 +338,11 @@
         "text": " what"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -353,11 +353,11 @@
         "text": "'"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -368,11 +368,11 @@
         "text": "s"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -383,11 +383,11 @@
         "text": " Moh"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -398,11 +398,11 @@
         "text": " is"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -413,11 +413,11 @@
         "text": "m"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -428,11 +428,11 @@
         "text": " Room"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -443,11 +443,11 @@
         "text": "s"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -458,11 +458,11 @@
         "text": " the"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -473,11 +473,11 @@
         "text": " tired"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -488,11 +488,11 @@
         "text": ":"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -503,11 +503,11 @@
         "text": "'"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -518,11 +518,11 @@
         "text": " capital"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
@@ -530,73 +530,73 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": " of"
+        "text": ","
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
       {
-        "finish_reason": "",
+        "finish_reason": "length",
         "index": 0,
         "logprobs": null,
         "text": " She"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
       {
-        "finish_reason": "",
+        "finish_reason": "length",
         "index": 1,
         "logprobs": null,
         "text": " scale"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
       {
-        "finish_reason": "",
+        "finish_reason": "length",
         "index": 2,
         "logprobs": null,
         "text": " of"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   },
   {
     "choices": [
       {
-        "finish_reason": "",
+        "finish_reason": "length",
         "index": 3,
         "logprobs": null,
-        "text": " being"
+        "text": " its"
       }
     ],
-    "created": 1713284431,
+    "created": 1724833943,
     "id": "",
     "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "object": "text_completion",
-    "system_fingerprint": "2.0.1-native"
+    "system_fingerprint": "2.2.1-dev0-native"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json
index 03f90367..732b0c49 100644
--- a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json
+++ b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2.json
@@ -16,7 +16,7 @@
       },
       {
         "id": 3102,
-        "logprob": -11.1875,
+        "logprob": -11.25,
         "text": " request"
       }
     ],
@@ -24,66 +24,66 @@
     "tokens": [
       {
         "id": 185,
-        "logprob": -1.5546875,
+        "logprob": -1.546875,
         "special": false,
         "text": "\n"
       },
       {
         "id": 549,
-        "logprob": -2.84375,
+        "logprob": -2.859375,
         "special": false,
         "text": "The"
       },
       {
         "id": 1727,
-        "logprob": -2.34375,
+        "logprob": -2.484375,
         "special": false,
         "text": " test"
       },
       {
         "id": 3102,
-        "logprob": -0.8359375,
+        "logprob": -0.83203125,
         "special": false,
         "text": " request"
       },
       {
         "id": 317,
-        "logprob": -1.0859375,
+        "logprob": -1.1484375,
         "special": false,
         "text": " is"
       },
       {
-        "id": 254,
-        "logprob": -1.5390625,
+        "id": 245,
+        "logprob": -1.578125,
         "special": false,
-        "text": " the"
+        "text": " a"
       },
       {
-        "id": 1022,
-        "logprob": -1.1875,
+        "id": 3412,
+        "logprob": -2.578125,
         "special": false,
-        "text": " first"
+        "text": " document"
       },
       {
-        "id": 3458,
-        "logprob": -0.35546875,
+        "id": 344,
+        "logprob": -1.125,
         "special": false,
-        "text": " step"
+        "text": " that"
       },
       {
-        "id": 279,
-        "logprob": -0.8828125,
+        "id": 317,
+        "logprob": -1.6953125,
         "special": false,
-        "text": " in"
+        "text": " is"
       },
       {
-        "id": 254,
-        "logprob": -0.71484375,
+        "id": 1222,
+        "logprob": -1.71875,
         "special": false,
-        "text": " the"
+        "text": " used"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "\nThe test request is the first step in the"
+  "generated_text": "\nThe test request is a document that is used"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json
index e365829a..f1eeab25 100644
--- a/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_deepseek_v2/test_flash_deepseek_v2_load.json
@@ -37,56 +37,56 @@
         },
         {
           "id": 1727,
-          "logprob": -2.359375,
+          "logprob": -2.4375,
           "special": false,
           "text": " test"
         },
         {
           "id": 3102,
-          "logprob": -0.83203125,
+          "logprob": -0.83984375,
           "special": false,
           "text": " request"
         },
         {
           "id": 317,
-          "logprob": -1.125,
+          "logprob": -1.1328125,
           "special": false,
           "text": " is"
         },
         {
-          "id": 245,
-          "logprob": -1.5703125,
+          "id": 254,
+          "logprob": -1.515625,
           "special": false,
-          "text": " a"
+          "text": " the"
         },
         {
-          "id": 3412,
-          "logprob": -2.578125,
+          "id": 1022,
+          "logprob": -1.15625,
           "special": false,
-          "text": " document"
+          "text": " first"
         },
         {
-          "id": 344,
-          "logprob": -1.125,
+          "id": 3458,
+          "logprob": -0.3671875,
           "special": false,
-          "text": " that"
+          "text": " step"
         },
         {
-          "id": 317,
-          "logprob": -1.6953125,
+          "id": 279,
+          "logprob": -0.88671875,
           "special": false,
-          "text": " is"
+          "text": " in"
         },
         {
-          "id": 1222,
-          "logprob": -1.75,
+          "id": 254,
+          "logprob": -0.69140625,
           "special": false,
-          "text": " used"
+          "text": " the"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\nThe test request is a document that is used"
+    "generated_text": "\nThe test request is the first step in the"
   },
   {
     "details": {
@@ -126,56 +126,56 @@
         },
         {
           "id": 1727,
-          "logprob": -2.359375,
+          "logprob": -2.4375,
           "special": false,
           "text": " test"
         },
         {
           "id": 3102,
-          "logprob": -0.83203125,
+          "logprob": -0.83984375,
           "special": false,
           "text": " request"
         },
         {
           "id": 317,
-          "logprob": -1.125,
+          "logprob": -1.1328125,
           "special": false,
           "text": " is"
         },
         {
-          "id": 245,
-          "logprob": -1.5703125,
+          "id": 254,
+          "logprob": -1.515625,
           "special": false,
-          "text": " a"
+          "text": " the"
         },
         {
-          "id": 3412,
-          "logprob": -2.578125,
+          "id": 1022,
+          "logprob": -1.15625,
           "special": false,
-          "text": " document"
+          "text": " first"
         },
         {
-          "id": 344,
-          "logprob": -1.125,
+          "id": 3458,
+          "logprob": -0.3671875,
           "special": false,
-          "text": " that"
+          "text": " step"
         },
         {
-          "id": 317,
-          "logprob": -1.6953125,
+          "id": 279,
+          "logprob": -0.88671875,
           "special": false,
-          "text": " is"
+          "text": " in"
         },
         {
-          "id": 1222,
-          "logprob": -1.75,
+          "id": 254,
+          "logprob": -0.69140625,
           "special": false,
-          "text": " used"
+          "text": " the"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\nThe test request is a document that is used"
+    "generated_text": "\nThe test request is the first step in the"
   },
   {
     "details": {
@@ -215,56 +215,56 @@
         },
         {
           "id": 1727,
-          "logprob": -2.359375,
+          "logprob": -2.4375,
           "special": false,
           "text": " test"
         },
         {
           "id": 3102,
-          "logprob": -0.83203125,
+          "logprob": -0.83984375,
           "special": false,
           "text": " request"
         },
         {
           "id": 317,
-          "logprob": -1.125,
+          "logprob": -1.1328125,
           "special": false,
           "text": " is"
         },
         {
-          "id": 245,
-          "logprob": -1.5703125,
+          "id": 254,
+          "logprob": -1.515625,
           "special": false,
-          "text": " a"
+          "text": " the"
         },
         {
-          "id": 3412,
-          "logprob": -2.578125,
+          "id": 1022,
+          "logprob": -1.15625,
           "special": false,
-          "text": " document"
+          "text": " first"
         },
         {
-          "id": 344,
-          "logprob": -1.125,
+          "id": 3458,
+          "logprob": -0.3671875,
           "special": false,
-          "text": " that"
+          "text": " step"
         },
         {
-          "id": 317,
-          "logprob": -1.6953125,
+          "id": 279,
+          "logprob": -0.88671875,
           "special": false,
-          "text": " is"
+          "text": " in"
         },
         {
-          "id": 1222,
-          "logprob": -1.75,
+          "id": 254,
+          "logprob": -0.69140625,
           "special": false,
-          "text": " used"
+          "text": " the"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\nThe test request is a document that is used"
+    "generated_text": "\nThe test request is the first step in the"
   },
   {
     "details": {
@@ -304,55 +304,55 @@
         },
         {
           "id": 1727,
-          "logprob": -2.359375,
+          "logprob": -2.4375,
           "special": false,
           "text": " test"
         },
         {
           "id": 3102,
-          "logprob": -0.83203125,
+          "logprob": -0.83984375,
           "special": false,
           "text": " request"
         },
         {
           "id": 317,
-          "logprob": -1.125,
+          "logprob": -1.1328125,
           "special": false,
           "text": " is"
         },
         {
-          "id": 245,
-          "logprob": -1.5703125,
+          "id": 254,
+          "logprob": -1.515625,
           "special": false,
-          "text": " a"
+          "text": " the"
         },
         {
-          "id": 3412,
-          "logprob": -2.578125,
+          "id": 1022,
+          "logprob": -1.15625,
           "special": false,
-          "text": " document"
+          "text": " first"
         },
         {
-          "id": 344,
-          "logprob": -1.125,
+          "id": 3458,
+          "logprob": -0.3671875,
           "special": false,
-          "text": " that"
+          "text": " step"
         },
         {
-          "id": 317,
-          "logprob": -1.6953125,
+          "id": 279,
+          "logprob": -0.88671875,
           "special": false,
-          "text": " is"
+          "text": " in"
         },
         {
-          "id": 1222,
-          "logprob": -1.75,
+          "id": 254,
+          "logprob": -0.69140625,
           "special": false,
-          "text": " used"
+          "text": " the"
         }
       ],
       "top_tokens": null
     },
-    "generated_text": "\nThe test request is a document that is used"
+    "generated_text": "\nThe test request is the first step in the"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
index bf981e4f..e39829ec 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json
@@ -1,8 +1,8 @@
 {
   "details": {
     "best_of_sequences": null,
-    "finish_reason": "length",
-    "generated_tokens": 10,
+    "finish_reason": "stop_sequence",
+    "generated_tokens": 5,
     "prefill": [
       {
         "id": 128000,
@@ -16,7 +16,7 @@
       },
       {
         "id": 1715,
-        "logprob": -10.375,
+        "logprob": -10.4375,
         "text": " request"
       }
     ],
@@ -29,61 +29,31 @@
         "text": ":"
       },
       {
-        "id": 2209,
-        "logprob": -2.78125,
+        "id": 923,
+        "logprob": -2.84375,
         "special": false,
-        "text": " Is"
+        "text": " add"
       },
       {
-        "id": 279,
-        "logprob": -0.6328125,
+        "id": 264,
+        "logprob": 0.0,
         "special": false,
-        "text": " the"
-      },
-      {
-        "id": 734,
-        "logprob": -2.703125,
-        "special": false,
-        "text": " function"
+        "text": " a"
       },
       {
         "id": 330,
-        "logprob": -0.34179688,
+        "logprob": -0.31640625,
         "special": false,
         "text": " \""
       },
       {
-        "id": 4110,
-        "logprob": -2.359375,
+        "id": 1985,
+        "logprob": 0.0,
         "special": false,
-        "text": "Create"
-      },
-      {
-        "id": 7575,
-        "logprob": -2.1875,
-        "special": false,
-        "text": "Process"
-      },
-      {
-        "id": 1,
-        "logprob": -0.07910156,
-        "special": false,
-        "text": "\""
-      },
-      {
-        "id": 304,
-        "logprob": -0.83203125,
-        "special": false,
-        "text": " in"
-      },
-      {
-        "id": 12468,
-        "logprob": -1.8203125,
-        "special": false,
-        "text": " Win"
+        "text": "test"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": "Test request: Is the function \"CreateProcess\" in Win"
+  "generated_text": "Test request: add a \"test"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json
index d882b82a..412b19b4 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json
@@ -16,7 +16,7 @@
       },
       {
         "id": 100,
-        "logprob": -0.38549805,
+        "logprob": -0.38305664,
         "text": "_"
       },
       {
@@ -29,7 +29,7 @@
     "tokens": [
       {
         "id": 2284,
-        "logprob": -0.31323242,
+        "logprob": -0.296875,
         "special": false,
         "text": "():"
       },
@@ -59,19 +59,19 @@
       },
       {
         "id": 10914,
-        "logprob": -0.7817383,
+        "logprob": -0.7734375,
         "special": false,
         "text": " World"
       },
       {
         "id": 16013,
-        "logprob": -0.6328125,
+        "logprob": -0.61816406,
         "special": false,
         "text": "!\")"
       },
       {
         "id": 222,
-        "logprob": -0.0619812,
+        "logprob": -0.054870605,
         "special": false,
         "text": "\n"
       },
@@ -83,7 +83,7 @@
       },
       {
         "id": 610,
-        "logprob": -0.4086914,
+        "logprob": -0.4152832,
         "special": false,
         "text": "def"
       },
@@ -113,7 +113,7 @@
       },
       {
         "id": 444,
-        "logprob": -0.21826172,
+        "logprob": -0.21618652,
         "special": false,
         "text": "name"
       },
@@ -173,7 +173,7 @@
       },
       {
         "id": 11571,
-        "logprob": -0.10021973,
+        "logprob": -0.08892822,
         "special": false,
         "text": "!\""
       },
diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json
index 1fad0b96..dab437b9 100644
--- a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json
+++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_all_params.json
@@ -30,19 +30,19 @@
       },
       {
         "id": 264,
-        "logprob": -0.37573242,
+        "logprob": -0.38061523,
         "special": false,
         "text": " a"
       },
       {
         "id": 633,
-        "logprob": -0.09161377,
+        "logprob": -0.09301758,
         "special": false,
         "text": " new"
       },
       {
         "id": 4480,
-        "logprob": -0.26171875,
+        "logprob": -0.26782227,
         "special": false,
         "text": " feature"
       },
@@ -78,7 +78,7 @@
       },
       {
         "id": 13,
-        "logprob": 0.0,
+        "logprob": -0.10632324,
         "special": false,
         "text": "\n"
       }
diff --git a/integration-tests/models/test_chat_llama.py b/integration-tests/models/test_chat_llama.py
index 1f7a4a59..7d24add3 100644
--- a/integration-tests/models/test_chat_llama.py
+++ b/integration-tests/models/test_chat_llama.py
@@ -35,6 +35,6 @@ async def test_flash_llama_simple(flash_llama_chat, response_snapshot):
     print(repr(response.choices[0].message.content))
     assert (
         response.choices[0].message.content
-        == "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to explore in the middle of urban confines. In fact, typical times for humidity levels in Brooklyn include:\n\n- Early morning: 80-85% humidity, with occas"
+        == "As of your last question, the weather in Brooklyn, New York, is typically hot and humid throughout the year. The suburbs around New York City are jealously sheltered, and at least in the Lower Bronx, there are very few outdoor environments to appreciate nature.\n\nIn terms of temperature, the warmest times of the year are from June to August, when average high temperatures typically range from around 73°F or 23°C"
     )
     assert response == response_snapshot
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 9a90a673..8e5c9dcd 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -8,7 +8,7 @@ use nix::unistd::Pid;
 use serde::Deserialize;
 use std::env;
 use std::ffi::OsString;
-use std::io::{BufRead, BufReader, Lines};
+use std::io::{BufRead, BufReader};
 use std::os::unix::process::{CommandExt, ExitStatusExt};
 use std::path::Path;
 use std::process::{Child, Command, ExitStatus, Stdio};
@@ -18,12 +18,103 @@ use std::sync::{mpsc, Arc};
 use std::thread;
 use std::thread::sleep;
 use std::time::{Duration, Instant};
-use std::{fs, io};
+use std::{
+    fs, io,
+    io::{Read, Write},
+};
 use thiserror::Error;
 use tracing_subscriber::{filter::LevelFilter, EnvFilter};
 
 mod env_runtime;
 
+fn get_config(
+    model_id: &str,
+    revision: &Option<String>,
+) -> Result<Config, Box<dyn std::error::Error>> {
+    let mut path = std::path::Path::new(model_id).to_path_buf();
+    let model_id = model_id.to_string();
+    let filename = if !path.exists() {
+        // Assume it's a hub id
+
+        let api = if let Ok(token) = std::env::var("HF_TOKEN") {
+            // env variable has precedence over on file token.
+            ApiBuilder::new().with_token(Some(token)).build()?
+        } else {
+            Api::new()?
+        };
+        let repo = if let Some(ref revision) = revision {
+            api.repo(Repo::with_revision(
+                model_id,
+                RepoType::Model,
+                revision.to_string(),
+            ))
+        } else {
+            api.model(model_id)
+        };
+        repo.get("config.json")?
+    } else {
+        path.push("config.json");
+        path
+    };
+
+    let content = std::fs::read_to_string(filename)?;
+    let config: RawConfig = serde_json::from_str(&content)?;
+
+    let config: Config = config.into();
+    Ok(config)
+}
+
+fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) -> (String, String) {
+    let mut prefix_caching: Option<String> = std::env::var("USE_PREFIX_CACHING").ok();
+    let mut attention: Option<String> = std::env::var("ATTENTION").ok();
+    if let Some(config) = config {
+        if prefix_caching.is_none() {
+            if config.vision_config.is_some() {
+                tracing::info!("Disabling prefix caching because of VLM model");
+                prefix_caching = Some("0".to_string());
+            } else if config.is_encoder_decoder {
+                tracing::info!("Disabling prefix caching because of seq2seq model");
+                prefix_caching = Some("0".to_string());
+            }
+        }
+        match config.head_dim {
+            Some(h) if h == 64 || h == 128 || h == 256 => {
+                if lora_adapters.is_some() && prefix_caching.is_none() {
+                    tracing::info!("Disabling prefix caching because of lora adapters");
+                    prefix_caching = Some("0".to_string());
+                }
+                match config.model_type.as_deref() {
+                    Some("gemma2") | Some("falcon") | Some("deepseek_v2") => {
+                        // Required because gemma2 needs bfloat16 which is not supported by
+                        // flashinfer ?
+                        if attention.is_none() {
+                            tracing::info!(
+                                "Forcing flash decoding because model {} requires it",
+                                config.model_type.as_ref().unwrap()
+                            );
+                            attention = Some("flashdecoding".to_string());
+                        }
+                    }
+                    Some("t5") => {}
+                    _ => {}
+                }
+            }
+            _ => {
+                if attention.is_none() {
+                    tracing::info!("Forcing flash decoding because head dim is not supported by flashinfer, also disabling prefix caching");
+                    attention = Some("flashdecoding".to_string());
+                }
+                if prefix_caching.is_none() {
+                    prefix_caching = Some("0".to_string());
+                }
+            }
+        }
+    }
+    let prefix_caching = prefix_caching.unwrap_or("true".to_string());
+    let attention = attention.unwrap_or("flashinfer".to_string());
+    (prefix_caching, attention)
+}
+
 #[derive(Deserialize)]
 struct RawConfig {
     max_position_embeddings: Option<usize>,
@@ -31,6 +122,12 @@ struct RawConfig {
     model_type: Option<String>,
     max_seq_len: Option<usize>,
     quantization_config: Option<QuantizationConfig>,
+    n_embd: Option<usize>,
+    hidden_size: Option<usize>,
+    num_attention_heads: Option<usize>,
+    head_dim: Option<usize>,
+    vision_config: Option<VisionConfig>,
+    is_encoder_decoder: Option<bool>,
 }
 
 #[derive(Deserialize)]
@@ -38,10 +135,17 @@ struct QuantizationConfig {
     quant_method: Option<Quantization>,
 }
 
+#[derive(Deserialize)]
+struct VisionConfig {}
+
 #[derive(Deserialize)]
 struct Config {
     max_position_embeddings: Option<usize>,
     quantize: Option<Quantization>,
+    head_dim: Option<usize>,
+    model_type: Option<String>,
+    vision_config: Option<VisionConfig>,
+    is_encoder_decoder: bool,
 }
 
 impl From<RawConfig> for Config {
@@ -51,9 +155,32 @@ impl From<RawConfig> for Config {
             .or(other.max_seq_len)
             .or(other.n_positions);
         let quantize = other.quantization_config.and_then(|q| q.quant_method);
+        let head_dim = other.head_dim.or_else(|| {
+            match (other.hidden_size, other.n_embd, other.num_attention_heads) {
+                (Some(hidden_size), _, Some(num_attention_heads))
+                    if hidden_size % num_attention_heads == 0 =>
+                {
+                    Some(hidden_size / num_attention_heads)
+                }
+                // Legacy
+                (_, Some(hidden_size), Some(num_attention_heads))
+                    if hidden_size % num_attention_heads == 0 =>
+                {
+                    Some(hidden_size / num_attention_heads)
+                }
+                _ => None,
+            }
+        });
+        let model_type = other.model_type;
+        let vision_config = other.vision_config;
+        let is_encoder_decoder = other.is_encoder_decoder.unwrap_or(false);
         Config {
             max_position_embeddings,
             quantize,
+            head_dim,
+            model_type,
+            vision_config,
+            is_encoder_decoder,
         }
     }
 }
@@ -731,6 +858,7 @@ fn shard_manager(
         .args(shard_args)
         .env_clear()
         .envs(envs)
+        .stdin(Stdio::piped())
         .stdout(Stdio::piped())
         .stderr(Stdio::piped())
         .process_group(0)
@@ -752,12 +880,13 @@ fn shard_manager(
     };
 
     // Redirect STDOUT to the console
+    let mut pstdin = p.stdin.take().unwrap();
     let shard_stdout_reader = BufReader::new(p.stdout.take().unwrap());
     let shard_stderr_reader = BufReader::new(p.stderr.take().unwrap());
 
     //stdout tracing thread
     thread::spawn(move || {
-        log_lines(shard_stdout_reader.lines());
+        log_lines(shard_stdout_reader);
     });
     // We read stderr in another thread as it seems that lines() can block in some cases
     let (err_sender, err_receiver) = mpsc::channel();
@@ -766,6 +895,18 @@ fn shard_manager(
             err_sender.send(line).unwrap_or(());
         }
     });
+    // We read stdin in another thread as it seems that lines() can block in some cases
+    thread::spawn(move || {
+        let mut stdin = io::stdin(); // We get `Stdin` here.
+        loop {
+            let mut buffer = vec![0; 4096];
+            if let Ok(n) = stdin.read(&mut buffer) {
+                if n > 0 {
+                    let _ = pstdin.write_all(&buffer[..n]);
+                }
+            }
+        }
+    });
 
     let mut ready = false;
     let start_time = Instant::now();
@@ -872,19 +1013,36 @@ impl PythonLogMessage {
     }
 }
 
-impl TryFrom<&String> for PythonLogMessage {
+impl TryFrom<&[u8]> for PythonLogMessage {
     type Error = serde_json::Error;
 
-    fn try_from(value: &String) -> Result<Self, Self::Error> {
-        serde_json::from_str::<Self>(value)
+    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
+        serde_json::from_slice::<Self>(value)
     }
 }
 
-fn log_lines<S: Sized + BufRead>(lines: Lines<S>) {
-    for line in lines.map_while(Result::ok) {
-        match PythonLogMessage::try_from(&line) {
-            Ok(log) => log.trace(),
-            Err(_) => tracing::debug!("{line}"),
+fn log_lines<R: Sized + Read>(mut bufread: BufReader<R>) {
+    let mut buffer = vec![0u8; 8 * 4096];
+    let mut stdout = std::io::stdout();
+    loop {
+        let n = bufread.read(&mut buffer);
+        if let Ok(n) = n {
+            if n > 0 {
+                let mut lines = buffer[..n].split(|i| *i == b'\n').peekable();
+                while let Some(line) = lines.next() {
+                    match PythonLogMessage::try_from(line) {
+                        Ok(log) => log.trace(),
+                        // For interactive debugging ?
+                        Err(_) => {
+                            stdout.write_all(line).unwrap();
+                            if lines.peek().is_some() {
+                                stdout.write_all(b"\n").unwrap();
+                            }
+                            stdout.flush().unwrap();
+                        }
+                    }
+                }
+            }
         }
     }
 }
@@ -1044,7 +1202,7 @@ fn download_convert_model(
     let download_stdout = BufReader::new(download_process.stdout.take().unwrap());
 
     thread::spawn(move || {
-        log_lines(download_stdout.lines());
+        log_lines(download_stdout);
     });
 
     let download_stderr = BufReader::new(download_process.stderr.take().unwrap());
@@ -1439,68 +1597,35 @@ fn main() -> Result<(), LauncherError> {
 
     tracing::info!("{:#?}", args);
 
-    let get_max_positions_quantize =
-        || -> Result<(usize, Option<Quantization>), Box<dyn std::error::Error>> {
-            let model_id = args.model_id.clone();
-            let mut path = std::path::Path::new(&args.model_id).to_path_buf();
-            let filename = if !path.exists() {
-                // Assume it's a hub id
+    let config: Option<Config> = get_config(&args.model_id, &args.revision).ok();
+    let quantize = config.as_ref().and_then(|c| c.quantize);
+    // Quantization usually means you're even more RAM constrained.
+    let max_default = 4096;
 
-                let api = if let Ok(token) = std::env::var("HF_TOKEN") {
-                    // env variable has precedence over on file token.
-                    ApiBuilder::new().with_token(Some(token)).build()?
-                } else {
-                    Api::new()?
-                };
-                let repo = if let Some(ref revision) = args.revision {
-                    api.repo(Repo::with_revision(
-                        model_id,
-                        RepoType::Model,
-                        revision.to_string(),
-                    ))
-                } else {
-                    api.model(model_id)
-                };
-                repo.get("config.json")?
-            } else {
-                path.push("config.json");
-                path
-            };
-
-            let content = std::fs::read_to_string(filename)?;
-            let config: RawConfig = serde_json::from_str(&content)?;
-
-            if config.model_type == Some("gemma2".to_string()) {
-                tracing::info!("Forcing flash decoding because of softcap usage");
-                std::env::set_var("ATTENTION", "flashdecoding");
-            }
-            let config: Config = config.into();
-            let quantize = config.quantize;
-
-            // Quantization usually means you're even more RAM constrained.
-            let max_default = 4096;
-
-            if let Some(max_position_embeddings) = config.max_position_embeddings {
-                if max_position_embeddings > max_default {
-                    let max = max_position_embeddings;
-                    if args.max_input_tokens.is_none()
-                        && args.max_total_tokens.is_none()
-                        && args.max_batch_prefill_tokens.is_none()
-                    {
-                        tracing::info!("Model supports up to {max} but tgi will now set its default to {max_default} instead. This is to save VRAM by refusing large prompts in order to allow more users on the same hardware. You can increase that size using `--max-batch-prefill-tokens={} --max-total-tokens={max} --max-input-tokens={}`.", max + 50, max - 1);
-                    }
-                    Ok((max_default, quantize))
-                } else {
-                    Ok((max_position_embeddings, quantize))
+    let max_position_embeddings = if let Some(config) = &config {
+        if let Some(max_position_embeddings) = config.max_position_embeddings {
+            if max_position_embeddings > max_default {
+                let max = max_position_embeddings;
+                if args.max_input_tokens.is_none()
+                    && args.max_total_tokens.is_none()
+                    && args.max_batch_prefill_tokens.is_none()
+                {
+                    tracing::info!("Model supports up to {max} but tgi will now set its default to {max_default} instead. This is to save VRAM by refusing large prompts in order to allow more users on the same hardware. You can increase that size using `--max-batch-prefill-tokens={} --max-total-tokens={max} --max-input-tokens={}`.", max + 50, max - 1);
                 }
+                max_default
             } else {
-                Err(Box::new(LauncherError::ArgumentValidation(
-                    "no max defined".to_string(),
-                )))
+                max_position_embeddings
             }
-        };
-    let (max_position_embeddings, quantize): (usize, Option<Quantization>) =
-        get_max_positions_quantize().unwrap_or((4096, None));
+        } else {
+            max_default
+        }
+    } else {
+        max_default
+    };
+    let (prefix_caching, attention) = resolve_attention(&config, &args.lora_adapters);
+    tracing::info!("Using attention {attention} - Prefix caching {prefix_caching}");
+    std::env::set_var("USE_PREFIX_CACHING", prefix_caching);
+    std::env::set_var("ATTENTION", attention);
 
     let max_input_tokens = {
         match (args.max_input_tokens, args.max_input_length) {
diff --git a/load_tests/common.js b/load_tests/common.js
index e0a10595..d890bf67 100644
--- a/load_tests/common.js
+++ b/load_tests/common.js
@@ -33,13 +33,13 @@ export function get_options() {
             //     rate: 20,
             //     timeUnit: '1s',
             // },
-            load_test: {
-                executor: 'constant-arrival-rate',
-                duration: '60s',
-                preAllocatedVUs: 100,
-                rate: 1,
-                timeUnit: '1s',
-            },
+            // load_test: {
+            //     executor: 'constant-arrival-rate',
+            //     duration: '60s',
+            //     preAllocatedVUs: 100,
+            //     rate: 1,
+            //     timeUnit: '1s',
+            // },
             // breakpoint: {
             //     executor: 'ramping-arrival-rate', //Assure load increase if the system slows
             //     preAllocatedVUs: 300,
@@ -47,12 +47,12 @@ export function get_options() {
             //         { duration: '60s', target: 100 }, // just slowly ramp-up to a HUGE load
             //     ],
             // },
-            // throughput: {
-            //     executor: 'shared-iterations',
-            //     vus: 100,
-            //     iterations: 200,
-            //     maxDuration: '40s',
-            // },
+            throughput: {
+                executor: 'shared-iterations',
+                vus: 100,
+                iterations: 200,
+                maxDuration: '40s',
+            },
         },
     };
 }
diff --git a/proto/v3/generate.proto b/proto/v3/generate.proto
index 68eea7ac..34894bda 100644
--- a/proto/v3/generate.proto
+++ b/proto/v3/generate.proto
@@ -137,6 +137,8 @@ message Request {
   optional string adapter_id = 11;
   /// Prefix length that can be retrieved from the KV cache.
   uint32 prefix_len = 12;
+  /// Context truncation
+  bool add_special_tokens = 13;
 }
 
 message Batch {
diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs
index 81c0d38f..240282d9 100644
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@@ -120,10 +120,11 @@ impl Infer {
     ) -> Result<Option<tokenizers::Encoding>, InferError> {
         // Tokenize request
         let inputs = request.inputs;
+        let add_special_tokens = request.add_special_tokens;
         let truncate = request.parameters.truncate;
         let encoding = self
             .validation
-            .tokenize(inputs, truncate)
+            .tokenize(inputs, add_special_tokens, truncate)
             .await
             .map_err(|err| {
                 tracing::error!("Tokenization {err}");
diff --git a/router/src/lib.rs b/router/src/lib.rs
index ce4f7c46..979f6dd1 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -22,6 +22,16 @@ pub enum Attention {
     FlashInfer,
 }
 
+impl Attention {
+    pub fn block_size(&self) -> u32 {
+        match self {
+            Attention::FlashDecoding => 256,
+            Attention::FlashInfer => 1,
+            Attention::Paged => 16,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct ParseError;
 
@@ -1072,6 +1082,16 @@ pub(crate) struct GenerateRequest {
     pub inputs: String,
     #[serde(default = "default_parameters")]
     pub parameters: GenerateParameters,
+
+    /// This is used internally because some requests
+    /// already contain the templated input therefore
+    /// we shouldn't add the special tokens.
+    #[serde(default = "default_true", skip)]
+    pub add_special_tokens: bool,
+}
+
+fn default_true() -> bool {
+    true
 }
 
 #[derive(Clone, Debug, Deserialize, ToSchema)]
@@ -1089,6 +1109,7 @@ impl From<CompatGenerateRequest> for GenerateRequest {
     fn from(req: CompatGenerateRequest) -> Self {
         Self {
             inputs: req.inputs,
+            add_special_tokens: true,
             parameters: req.parameters,
         }
     }
diff --git a/router/src/server.rs b/router/src/server.rs
index 8ebd1a33..f273a786 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -158,6 +158,7 @@ async fn get_chat_tokenize(
 
     let generate_request = GenerateRequest {
         inputs,
+        add_special_tokens: false,
         parameters: GenerateParameters {
             best_of: None,
             temperature,
@@ -754,6 +755,7 @@ async fn completions(
         .iter()
         .map(|prompt| GenerateRequest {
             inputs: prompt.to_string(),
+            add_special_tokens: true,
             parameters: GenerateParameters {
                 best_of: None,
                 temperature,
@@ -1180,6 +1182,7 @@ async fn chat_completions(
     // build the request passing some parameters
     let generate_request = GenerateRequest {
         inputs: inputs.to_string(),
+        add_special_tokens: false,
         parameters: GenerateParameters {
             best_of: None,
             temperature,
@@ -1386,6 +1389,7 @@ async fn vertex_compatibility(
         .map(|instance| {
             let generate_request = GenerateRequest {
                 inputs: instance.inputs.clone(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     do_sample: true,
                     max_new_tokens: instance.parameters.as_ref().and_then(|p| p.max_new_tokens),
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 0024723c..92491d88 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -95,6 +95,7 @@ impl Validation {
     pub async fn tokenize(
         &self,
         inputs: String,
+        add_special_tokens: bool,
         truncate: Option<usize>,
     ) -> Result<Option<(tokenizers::Encoding, Vec<Chunk>)>, ValidationError> {
         // If we have a fast tokenizer
@@ -104,7 +105,11 @@ impl Validation {
             // Send request to the background validation task
             // Unwrap is safe here
             sender
-                .send(((inputs, truncate), response_sender, Span::current()))
+                .send((
+                    (inputs, add_special_tokens, truncate),
+                    response_sender,
+                    Span::current(),
+                ))
                 .unwrap();
 
             // Await on response channel
@@ -121,11 +126,15 @@ impl Validation {
     async fn validate_input(
         &self,
         inputs: String,
+        add_special_tokens: bool,
         truncate: Option<usize>,
         max_new_tokens: Option<u32>,
     ) -> Result<(Vec<Chunk>, Option<Vec<u32>>, usize, u32), ValidationError> {
         // If we have a fast tokenizer
-        if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? {
+        if let Some((encoding, inputs)) = self
+            .tokenize(inputs.clone(), add_special_tokens, truncate)
+            .await?
+        {
             // Create response channel
             let input_length = if let Some(truncate) = truncate {
                 std::cmp::min(encoding.len(), truncate)
@@ -158,7 +167,8 @@ impl Validation {
                 ));
             }
 
-            let input_ids = encoding.get_ids()[..input_length].to_owned();
+            let ids = encoding.get_ids();
+            let input_ids = ids[ids.len().saturating_sub(input_length)..].to_owned();
 
             metrics::histogram!("tgi_request_input_length").record(input_length as f64);
             Ok((inputs, Some(input_ids), input_length, max_new_tokens))
@@ -324,7 +334,12 @@ impl Validation {
 
         // Validate inputs
         let (inputs, input_ids, input_length, max_new_tokens) = self
-            .validate_input(request.inputs, truncate, max_new_tokens)
+            .validate_input(
+                request.inputs,
+                request.add_special_tokens,
+                truncate,
+                max_new_tokens,
+            )
             .await?;
 
         // TODO: we should build the FSM here and pass the compiled FSM instead of the grammar
@@ -401,6 +416,7 @@ impl Validation {
         Ok(ValidGenerateRequest {
             inputs,
             input_ids: input_ids.map(Arc::new),
+            add_special_tokens: request.add_special_tokens,
             decoder_input_details,
             input_length: input_length as u32,
             truncate: truncate.unwrap_or(self.max_input_length) as u32,
@@ -449,12 +465,15 @@ fn tokenizer_worker(
     mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>,
 ) {
     // Loop over requests
-    while let Some(((inputs, truncate), response_tx, parent_span)) = receiver.blocking_recv() {
+    while let Some(((inputs, add_special_tokens, truncate), response_tx, parent_span)) =
+        receiver.blocking_recv()
+    {
         parent_span.in_scope(|| {
             response_tx
                 .send(prepare_input(
                     inputs,
                     truncate,
+                    add_special_tokens,
                     &tokenizer,
                     config.as_ref(),
                     preprocessor_config.as_ref(),
@@ -591,6 +610,7 @@ fn image_tokens_fixup(config: &Config, text: String) -> String {
 fn prepare_input(
     inputs: String,
     _truncate: Option<usize>,
+    add_special_tokens: bool,
     tokenizer: &Tokenizer,
     config: Option<&Config>,
     preprocessor_config: Option<&HubPreprocessorConfig>,
@@ -628,14 +648,14 @@ fn prepare_input(
 
     // Get the number of tokens in the input
     let encoding = tokenizer
-        .encode(tokenizer_query, true)
+        .encode(tokenizer_query, add_special_tokens)
         .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
 
     Ok((encoding, input_chunks))
 }
 
 type TokenizerRequest = (
-    (String, Option<usize>),
+    (String, bool, Option<usize>),
     oneshot::Sender<Result<(tokenizers::Encoding, Vec<Chunk>), ValidationError>>,
     Span,
 );
@@ -720,6 +740,7 @@ pub struct ValidGenerateRequest {
     pub input_ids: Option<Arc<Vec<u32>>>,
     pub input_length: u32,
     pub truncate: u32,
+    pub add_special_tokens: bool,
     pub decoder_input_details: bool,
     pub parameters: ValidParameters,
     pub stopping_parameters: ValidStoppingParameters,
@@ -826,7 +847,7 @@ mod tests {
 
         let max_new_tokens = 10;
         match validation
-            .validate_input("Hello".to_string(), None, Some(max_new_tokens))
+            .validate_input("Hello".to_string(), true, None, Some(max_new_tokens))
             .await
         {
             // Err(ValidationError::MaxNewTokens(1, 10)) => (),
@@ -861,7 +882,7 @@ mod tests {
 
         let max_new_tokens = 10;
         match validation
-            .validate_input("Hello".to_string(), None, Some(max_new_tokens))
+            .validate_input("Hello".to_string(), true, None, Some(max_new_tokens))
             .await
         {
             Err(ValidationError::MaxTotalTokens(6, 1, 10)) => (),
@@ -895,6 +916,7 @@ mod tests {
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     best_of: Some(2),
                     do_sample: false,
@@ -934,6 +956,7 @@ mod tests {
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_p: Some(1.0),
                     max_new_tokens: Some(5),
@@ -949,6 +972,7 @@ mod tests {
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_p: Some(0.99),
                     max_new_tokens: Some(5),
@@ -964,6 +988,7 @@ mod tests {
         let valid_request = validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_p: None,
                     max_new_tokens: Some(5),
@@ -1002,6 +1027,7 @@ mod tests {
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_n_tokens: Some(5),
                     max_new_tokens: Some(5),
@@ -1017,6 +1043,7 @@ mod tests {
         validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_n_tokens: Some(4),
                     max_new_tokens: Some(5),
@@ -1029,6 +1056,7 @@ mod tests {
         validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_n_tokens: Some(0),
                     max_new_tokens: Some(5),
@@ -1041,6 +1069,7 @@ mod tests {
         let valid_request = validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
+                add_special_tokens: true,
                 parameters: GenerateParameters {
                     top_n_tokens: None,
                     max_new_tokens: Some(5),
@@ -1089,6 +1118,7 @@ mod tests {
         let chunks = match validation
             .tokenize(
                 format!("test![](data:image/gif;base64,{})", PIXEL_GIF),
+                true,
                 None,
             )
             .await
@@ -1148,6 +1178,7 @@ mod tests {
                     "test![](data:image/gif;base64,{})![](data:image/gif;base64,{})",
                     PIXEL_GIF, PIXEL_GIF
                 ),
+                true,
                 None,
             )
             .await
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 8c77896e..f392b161 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
 # Released on: June 13, 2024
 # https://releases.rs/docs/1.79.0/
-channel = "1.79.0"
+channel = "1.80.0"
 components = ["rustfmt", "clippy"]
diff --git a/server/Makefile b/server/Makefile
index 51ea8b32..9338b299 100644
--- a/server/Makefile
+++ b/server/Makefile
@@ -7,6 +7,7 @@ include Makefile-selective-scan
 include Makefile-lorax-punica
 include Makefile-fbgemm
 include Makefile-exllamav2
+include Makefile-flashinfer
 
 unit-tests:
 	pytest -s -vv -m "not private" tests
diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer
new file mode 100644
index 00000000..3abb0491
--- /dev/null
+++ b/server/Makefile-flashinfer
@@ -0,0 +1,2 @@
+install-flashinfer:
+	pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4
diff --git a/server/tests/conftest.py b/server/tests/conftest.py
index 16d2c408..d99771f8 100644
--- a/server/tests/conftest.py
+++ b/server/tests/conftest.py
@@ -1,7 +1,10 @@
 import pytest
-
+import os
 from text_generation_server.pb import generate_pb2
 
+os.environ["USE_PREFIX_CACHING"] = "1"
+os.environ["ATTENTION"] = "flashinfer"
+
 
 @pytest.fixture
 def default_pb_parameters():
diff --git a/server/text_generation_server/layers/attention/common.py b/server/text_generation_server/layers/attention/common.py
index f162230c..855f4dfc 100644
--- a/server/text_generation_server/layers/attention/common.py
+++ b/server/text_generation_server/layers/attention/common.py
@@ -9,26 +9,46 @@ if ATTENTION in {"flashinfer", "flashdecoding"}:
     @dataclass
     class Seqlen:
         input_lengths: torch.Tensor
+        prefix_lengths: torch.Tensor
         cu_seqlen_q: Optional[torch.Tensor]
         cu_seqlen_k: Optional[torch.Tensor]
+        max_q: int
+        max_k: int
 
-        def __init__(self, input_lengths):
+        def __init__(
+            self,
+            input_lengths,
+            prefix_lengths,
+            cu_seqlen_q=None,
+            max_q=None,
+            max_k=None,
+        ):
             self.input_lengths = input_lengths
+            self.prefix_lengths = prefix_lengths
             device = self.input_lengths.device
             shape = self.input_lengths.shape
-            cu_seqlen_q = torch.arange(
-                shape[0] + 1,
-                device=device,
-                dtype=torch.int32,
-            )
+            if cu_seqlen_q is None:
+                cu_seqlen_q = torch.arange(
+                    shape[0] + 1,
+                    device=device,
+                    dtype=torch.int32,
+                )
+                max_q = 1
+            else:
+                assert max_q is not None
+            assert max_k is not None
             cu_seqlen_k = torch.zeros(shape[-1] + 1, device=device, dtype=torch.int32)
+
             # cuda graphs don't like this and this is necessary to clamp within mistral
             # Although FA2 might not want the clamping
             # cu_seqlen_k[0] = 0
-            torch.cumsum(self.input_lengths, -1, out=cu_seqlen_k[1:])
+            total = self.input_lengths + self.prefix_lengths
+            torch.cumsum(total, -1, out=cu_seqlen_k[1:])
 
             self.cu_seqlen_q = cu_seqlen_q
             self.cu_seqlen_k = cu_seqlen_k
+            self.max_q = max_q
+            self.max_k = max_k
 
         def clamp(self, max):
             # Flash decoding doesn't need to clamp
@@ -39,6 +59,11 @@ else:
     @dataclass
     class Seqlen:
         input_lengths: torch.Tensor
+        prefix_lengths: torch.Tensor
+        cu_seqlen_q: torch.Tensor
+        max_q: int
+        max_k: int
 
         def clamp(self, max):
+            raise NotImplementedError("Not implemented seqlen for paged")
             return Seqlen(torch.clamp(self.input_lengths, max=max))
diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py
index b3b7ea4f..4b588b5c 100644
--- a/server/text_generation_server/layers/attention/cuda.py
+++ b/server/text_generation_server/layers/attention/cuda.py
@@ -222,18 +222,15 @@ if ATTENTION == "flashinfer":
 
     def attention(
         q: torch.Tensor,
-        k: torch.Tensor,
-        v: torch.Tensor,
         key_cache: torch.Tensor,
         value_cache: torch.Tensor,
-        cu_seqlens,
-        max_s,
+        seqlen: Seqlen,
+        block_tables: torch.Tensor,
         softmax_scale,
         window_size_left=-1,
         causal=True,
         softcap=0.0,
     ):
-        assert window_size_left == -1, "Windowing is not supported with flash infer"
         from text_generation_server.layers.attention.flashinfer import (
             prefill_with_paged_kv_state,
         )
@@ -244,18 +241,17 @@ if ATTENTION == "flashinfer":
             paged_kv_cache=(key_cache, value_cache),
             logits_soft_cap=softcap,
             sm_scale=softmax_scale,
+            window_left=window_size_left,
         )
 
 elif V2:
 
     def attention(
         q,
-        k,
-        v,
         key_cache: torch.Tensor,
         value_cache: torch.Tensor,
-        cu_seqlens,
-        max_s,
+        seqlen: Seqlen,
+        block_tables: torch.Tensor,
         softmax_scale,
         window_size_left=-1,
         causal=True,
@@ -266,17 +262,17 @@ elif V2:
             raise ValueError("`window_size_left` must be > 0 or -1")
         return flash_attn_2_cuda.varlen_fwd(
             q,
-            k,
-            v,
+            key_cache,
+            value_cache,
             out,
-            cu_seqlens,
-            cu_seqlens,
+            seqlen.cu_seqlen_q,
+            seqlen.cu_seqlen_k,
             None,
             None,
+            block_tables,
             None,
-            None,
-            max_s,
-            max_s,
+            seqlen.max_q,
+            seqlen.max_k,
             0.0,
             softmax_scale,
             False,
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 4fa9e66d..e03cc30d 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -497,15 +497,14 @@ def get_model(
         else -1
     )
 
-    should_use_sliding_window = (
-        sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING
+    use_sliding_window = sliding_window is not None and sliding_window != -1
+    needs_sliding_window = (
+        max_input_tokens is not None and max_input_tokens > sliding_window
     )
-
-    if should_use_sliding_window:
-        if max_input_tokens is not None and max_input_tokens > sliding_window:
-            raise ValueError(
-                f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
-            )
+    if use_sliding_window and needs_sliding_window and not SUPPORTS_WINDOWING:
+        raise ValueError(
+            f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
+        )
 
     if model_type == DEEPSEEK_V2:
         if FLASH_ATTENTION:
diff --git a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
index 1eb8c6c3..fe19180a 100644
--- a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
@@ -29,6 +29,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers import (
@@ -264,7 +265,7 @@ class FlashCohereAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -296,12 +297,10 @@ class FlashCohereAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                key,
-                value,
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -313,7 +312,7 @@ class FlashCohereAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -388,7 +387,7 @@ class FlashCohereLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
@@ -402,7 +401,7 @@ class FlashCohereLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -454,7 +453,7 @@ class FlashCohereModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: torch.Tensor,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.embed_tokens(input_ids)
@@ -477,7 +476,7 @@ class FlashCohereModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -518,7 +517,7 @@ class FlashCohereForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -531,7 +530,7 @@ class FlashCohereForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
index fc0dca5b..b82b5473 100644
--- a/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
@@ -29,6 +29,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     FastLinear,
@@ -309,7 +310,7 @@ class DbrxAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -335,12 +336,10 @@ class DbrxAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -352,7 +351,7 @@ class DbrxAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -389,7 +388,7 @@ class DbrxNormAttentionNorm(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         normed_hidden_states, res = self.norm_1(hidden_states, residual)
@@ -403,7 +402,7 @@ class DbrxNormAttentionNorm(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -622,7 +621,7 @@ class DbrxLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         # Self Attention
@@ -635,7 +634,7 @@ class DbrxLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -679,7 +678,7 @@ class DbrxModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.embed_tokens(input_ids)
@@ -701,7 +700,7 @@ class DbrxModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -734,7 +733,7 @@ class FlashDbrxForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -747,7 +746,7 @@ class FlashDbrxForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
index b25becd5..0585b40e 100644
--- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
@@ -29,8 +29,8 @@ from text_generation_server.layers.attention import (
     attention,
     paged_attention,
     reshape_and_cache,
+    Seqlen,
 )
-from text_generation_server.layers.attention.common import Seqlen
 from text_generation_server.layers.layernorm import FastRMSNorm
 from text_generation_server.layers.rotary import PositionRotaryEmbedding, get_mscale
 from text_generation_server.utils.import_utils import SYSTEM
@@ -298,7 +298,7 @@ class DeepseekV2Attention(torch.nn.Module):
         kv_cache: Tuple[torch.Tensor, torch.Tensor],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: Seqlen,
+        seqlen: Seqlen,
         max_s: int,
     ):
         if self.q_lora_rank is None:
@@ -363,12 +363,10 @@ class DeepseekV2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                key,
-                value,
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -380,7 +378,7 @@ class DeepseekV2Attention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -666,7 +664,7 @@ class DeepseekV2Layer(nn.Module):
         kv_cache,
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: Seqlen,
+        seqlen: Seqlen,
         max_s: int,
     ):
         normed_hidden_states, residual = self.input_layernorm(hidden_states, residual)
@@ -680,7 +678,7 @@ class DeepseekV2Layer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -729,7 +727,7 @@ class DeepseekV2Model(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.embed_tokens(input_ids)
@@ -751,7 +749,7 @@ class DeepseekV2Model(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -781,7 +779,7 @@ class FlashDeepseekV2ForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -794,7 +792,7 @@ class FlashDeepseekV2ForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
index faf0f325..d16e805f 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
@@ -30,6 +30,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -213,7 +214,7 @@ class FlashGemma2Attention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -236,12 +237,10 @@ class FlashGemma2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 causal=self.causal,
                 window_size_left=self.window_size,
@@ -256,7 +255,7 @@ class FlashGemma2Attention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
                 softcap=self.softcap,
             )
@@ -343,7 +342,7 @@ class FlashGemma2Layer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
@@ -357,7 +356,7 @@ class FlashGemma2Layer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -408,7 +407,7 @@ class FlashGemma2Model(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = inputs_embeds
@@ -430,7 +429,7 @@ class FlashGemma2Model(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -477,7 +476,7 @@ class FlashGemma2ForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -491,7 +490,7 @@ class FlashGemma2ForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
index 33738a59..34be4cb8 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
@@ -30,6 +30,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -207,7 +208,7 @@ class FlashGemmaAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -230,12 +231,10 @@ class FlashGemmaAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 causal=self.causal,
             )
@@ -248,7 +247,7 @@ class FlashGemmaAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -320,7 +319,7 @@ class FlashGemmaLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
@@ -334,7 +333,7 @@ class FlashGemmaLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -382,7 +381,7 @@ class FlashGemmaModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = inputs_embeds
@@ -404,7 +403,7 @@ class FlashGemmaModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -449,7 +448,7 @@ class FlashGemmaForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -463,7 +462,7 @@ class FlashGemmaForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
index d30b5a0a..403fa908 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
@@ -29,6 +29,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -213,7 +214,7 @@ class FlashGPT2Attention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         query, key, value = self.query_key_value(hidden_states).split(
@@ -230,12 +231,10 @@ class FlashGPT2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                key,
-                value,
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -247,7 +246,7 @@ class FlashGPT2Attention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -316,7 +315,7 @@ class FlashGPT2Layer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         residual = hidden_states
@@ -329,7 +328,7 @@ class FlashGPT2Layer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -382,7 +381,7 @@ class FlashGPT2Model(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -398,7 +397,7 @@ class FlashGPT2Model(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -435,7 +434,7 @@ class FlashGPT2ForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -451,7 +450,7 @@ class FlashGPT2ForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s=max_s,
             prefill_cache_indices=prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
index eb667384..35ab2791 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
@@ -29,6 +29,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -167,7 +168,7 @@ class FlashGPTJAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         query, key, value = self.query_key_value(hidden_states).split(
@@ -192,10 +193,10 @@ class FlashGPTJAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                key,
-                value,
-                cu_seqlen_prefill,
-                max_s,
+                kv_cache[0],
+                kv_cache[1],
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -207,7 +208,7 @@ class FlashGPTJAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -268,7 +269,7 @@ class FlashGPTJLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         hidden_states, residual = self.input_layernorm(hidden_states, residual)
@@ -281,7 +282,7 @@ class FlashGPTJLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -328,7 +329,7 @@ class FlashGPTJModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
     ) -> torch.Tensor:
@@ -351,7 +352,7 @@ class FlashGPTJModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -382,7 +383,7 @@ class FlashGPTJForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -395,7 +396,7 @@ class FlashGPTJForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             prefill_cache_indices=prefill_cache_indices,
         )
diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
index 3253d2dc..5b228f9f 100644
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -32,6 +32,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -194,7 +195,7 @@ class FlashLlamaAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         adapter_data,
     ):
@@ -218,12 +219,10 @@ class FlashLlamaAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -235,7 +234,7 @@ class FlashLlamaAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -375,7 +374,7 @@ class FlashLlamaLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         adapter_data,
     ):
@@ -390,7 +389,7 @@ class FlashLlamaLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             adapter_data,
         )
@@ -479,7 +478,7 @@ class FlashLlamaModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -504,7 +503,7 @@ class FlashLlamaModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
                 adapter_data,
             )
@@ -548,7 +547,7 @@ class FlashLlamaForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -562,7 +561,7 @@ class FlashLlamaForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s=max_s,
             prefill_cache_indices=prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
index 5a150267..30ca3faf 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -31,6 +31,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -185,7 +186,7 @@ class MistralAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
         adapter_data,
@@ -217,12 +218,10 @@ class MistralAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 window_size_left=self.max_past,
             )
@@ -235,7 +234,7 @@ class MistralAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -356,7 +355,7 @@ class MistralLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
         adapter_data,
@@ -372,7 +371,7 @@ class MistralLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             prefill_cache_indices,
             adapter_data,
@@ -424,7 +423,7 @@ class MistralModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -448,7 +447,7 @@ class MistralModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
                 prefill_cache_indices,
                 adapter_data,
@@ -499,7 +498,7 @@ class FlashMistralForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -512,7 +511,7 @@ class FlashMistralForCausalLM(torch.nn.Module):
         elif self.max_past is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            input_lengths = input_lengths.clamp(max=self.max_past_tensor)
+            seqlen = seqlen.clamp(max=self.max_past_tensor)
 
         inputs_embeds = self.embed_tokens(input_ids)
         hidden_states = self.model(
@@ -522,7 +521,7 @@ class FlashMistralForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s,
             prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
index ad426ffe..c5d60af1 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -35,6 +35,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     FastLinear,
@@ -243,7 +244,7 @@ class MixtralAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -274,12 +275,10 @@ class MixtralAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 window_size_left=self.max_past,
             )
@@ -292,7 +291,7 @@ class MixtralAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -498,7 +497,7 @@ class MixtralLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -513,7 +512,7 @@ class MixtralLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             prefill_cache_indices,
         )
@@ -568,7 +567,7 @@ class MixtralModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -592,7 +591,7 @@ class MixtralModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
                 prefill_cache_indices,
             )
@@ -627,7 +626,7 @@ class FlashMixtralForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -640,7 +639,7 @@ class FlashMixtralForCausalLM(torch.nn.Module):
         elif self.max_past is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            input_lengths = input_lengths.clamp(max=self.max_past_tensor)
+            seqlen = seqlen.clamp(max=self.max_past_tensor)
 
         hidden_states = self.model(
             input_ids,
@@ -649,7 +648,7 @@ class FlashMixtralForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s,
             prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index b684e035..fda648f9 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -31,6 +31,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -147,7 +148,7 @@ class FlashNeoxAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -171,12 +172,10 @@ class FlashNeoxAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 qkv[:, 0],
-                qkv[:, 1],
-                qkv[:, 2],
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -188,7 +187,7 @@ class FlashNeoxAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -258,7 +257,7 @@ class FlashNeoXLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         if self.use_parallel_residual:
@@ -272,7 +271,7 @@ class FlashNeoXLayer(nn.Module):
                 kv_cache,
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -296,7 +295,7 @@ class FlashNeoXLayer(nn.Module):
                 kv_cache,
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -350,7 +349,7 @@ class FlashGPTNeoXModel(FlashGPTNeoXPreTrainedModel):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.embed_in(input_ids)
@@ -372,7 +371,7 @@ class FlashGPTNeoXModel(FlashGPTNeoXPreTrainedModel):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -404,7 +403,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -417,7 +416,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
index e08a2aad..d044b492 100644
--- a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
@@ -19,6 +19,7 @@ from torch import nn
 from typing import Optional, List, Tuple
 
 from text_generation_server.layers.tensor_parallel import TensorParallelColumnLinear
+from text_generation_server.layers.attention import Seqlen
 from text_generation_server.models.custom_modeling.vlm import (
     load_text_model,
     load_vision_model,
@@ -70,7 +71,7 @@ class PaliGemmaForConditionalGeneration(nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -107,7 +108,7 @@ class PaliGemmaForConditionalGeneration(nn.Module):
             kv_cache=kv_cache,
             block_tables=block_tables,
             slots=slots,
-            input_lengths=input_lengths,
+            seqlen=seqlen,
             max_s=max_s,
         )
 
diff --git a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
index efe27c13..37adb8be 100644
--- a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -10,6 +10,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -159,7 +160,7 @@ class FlashPhiAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         # Compute query, key, value and split
@@ -192,12 +193,10 @@ class FlashPhiAttention(torch.nn.Module):
         if cu_seqlen_prefill is not None:
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -209,7 +208,7 @@ class FlashPhiAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -276,7 +275,7 @@ class FlashPhiLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         hidden_states, res = self.input_layernorm(hidden_states, residual)
@@ -289,7 +288,7 @@ class FlashPhiLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -341,7 +340,7 @@ class FlashPhiModel(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.embed_tokens(input_ids)
@@ -363,7 +362,7 @@ class FlashPhiModel(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -396,7 +395,7 @@ class FlashPhiForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -409,7 +408,7 @@ class FlashPhiForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
index 879b8abd..5aac28a3 100644
--- a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
@@ -9,6 +9,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -104,7 +105,7 @@ class Qwen2Attention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -135,12 +136,10 @@ class Qwen2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 window_size_left=self.max_past,
             )
@@ -153,7 +152,7 @@ class Qwen2Attention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -225,7 +224,7 @@ class Qwen2Layer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -240,7 +239,7 @@ class Qwen2Layer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             prefill_cache_indices,
         )
@@ -296,7 +295,7 @@ class Qwen2Model(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -320,7 +319,7 @@ class Qwen2Model(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
                 prefill_cache_indices,
             )
@@ -361,7 +360,7 @@ class Qwen2ForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -374,7 +373,7 @@ class Qwen2ForCausalLM(torch.nn.Module):
         elif self.max_past is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            input_lengths = input_lengths.clamp(max=self.max_past_tensor)
+            seqlen = seqlen.clamp(max=self.max_past_tensor)
 
         hidden_states = self.model(
             input_ids,
@@ -383,7 +382,7 @@ class Qwen2ForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s,
             prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
index c72a9b90..1c55dd91 100644
--- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
@@ -19,6 +19,7 @@ from text_generation_server.layers.attention import (
     attention,
     paged_attention,
     reshape_and_cache,
+    Seqlen,
 )
 
 
@@ -181,7 +182,7 @@ class FlashRWAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -206,12 +207,10 @@ class FlashRWAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -223,7 +222,7 @@ class FlashRWAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -296,7 +295,7 @@ class FlashRWLargeAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.query_key_value(hidden_states)
@@ -343,7 +342,7 @@ class FlashRWLargeAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -429,7 +428,7 @@ class FlashRWLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         if self.parallel_attn:
@@ -443,7 +442,7 @@ class FlashRWLayer(nn.Module):
                 kv_cache,
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -465,7 +464,7 @@ class FlashRWLayer(nn.Module):
                 kv_cache,
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -552,7 +551,7 @@ class FlashRWLargeLayer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         # Layer norm.
@@ -567,7 +566,7 @@ class FlashRWLargeLayer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -628,7 +627,7 @@ class FlashRWModel(FlashRWPreTrainedModel):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.word_embeddings(input_ids)
@@ -650,7 +649,7 @@ class FlashRWModel(FlashRWPreTrainedModel):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -680,7 +679,7 @@ class FlashRWForCausalLM(FlashRWPreTrainedModel):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -693,7 +692,7 @@ class FlashRWForCausalLM(FlashRWPreTrainedModel):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
index 109304be..19025c4c 100644
--- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
@@ -9,6 +9,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -268,7 +269,7 @@ class FlashMQAttention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         qkv = self.c_attn(hidden_states)
@@ -291,12 +292,10 @@ class FlashMQAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(key_value, dim=1, index=0),
-                torch.select(key_value, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
@@ -308,7 +307,7 @@ class FlashMQAttention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -373,7 +372,7 @@ class Block(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
     ):
         hidden_states, residual = self.ln_1(hidden_states, residual)
@@ -383,7 +382,7 @@ class Block(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
 
@@ -437,7 +436,7 @@ class FlashSantacoderModel(nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
     ) -> torch.Tensor:
         hidden_states = self.wte(input_ids) + self.wpe(position_ids)
@@ -454,7 +453,7 @@ class FlashSantacoderModel(nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -486,7 +485,7 @@ class FlashSantacoderForCausalLM(nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -499,7 +498,7 @@ class FlashSantacoderForCausalLM(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
index 200d4ef0..2f9ecd0d 100644
--- a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
@@ -30,6 +30,7 @@ from text_generation_server.layers.attention import (
     paged_attention,
     attention,
     reshape_and_cache,
+    Seqlen,
 )
 from text_generation_server.layers import (
     TensorParallelRowLinear,
@@ -209,7 +210,7 @@ class Starcoder2Attention(torch.nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -240,12 +241,10 @@ class Starcoder2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=1, index=0),
-                torch.select(kv, dim=1, index=1),
                 kv_cache[0],
                 kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                seqlen,
+                block_tables,
                 self.softmax_scale,
                 window_size_left=self.max_past,
             )
@@ -258,7 +257,7 @@ class Starcoder2Attention(torch.nn.Module):
                 self.kv_head_mapping,
                 self.softmax_scale,
                 block_tables,
-                input_lengths,
+                seqlen,
                 max_s,
             )
 
@@ -381,7 +380,7 @@ class Starcoder2Layer(nn.Module):
         kv_cache,
         block_tables,
         slots,
-        input_lengths,
+        seqlen,
         max_s,
         prefill_cache_indices,
     ):
@@ -396,7 +395,7 @@ class Starcoder2Layer(nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             prefill_cache_indices,
         )
@@ -449,7 +448,7 @@ class Starcoder2Model(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
@@ -473,7 +472,7 @@ class Starcoder2Model(torch.nn.Module):
                 kv_cache[i],
                 block_tables,
                 slots,
-                input_lengths,
+                seqlen,
                 max_s,
                 prefill_cache_indices,
             )
@@ -521,7 +520,7 @@ class FlashStarcoder2ForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -534,7 +533,7 @@ class FlashStarcoder2ForCausalLM(torch.nn.Module):
         elif self.max_past is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            input_lengths = input_lengths.clamp(max=self.max_past_tensor)
+            seqlen = seqlen.clamp(max=self.max_past_tensor)
 
         hidden_states = self.model(
             input_ids,
@@ -543,7 +542,7 @@ class FlashStarcoder2ForCausalLM(torch.nn.Module):
             kv_cache,
             block_tables,
             slots,
-            input_lengths,
+            seqlen,
             max_s,
             true_max_s,
             prefill_cache_indices,
diff --git a/server/text_generation_server/models/custom_modeling/idefics2.py b/server/text_generation_server/models/custom_modeling/idefics2.py
index 7e4deaf8..a829c374 100644
--- a/server/text_generation_server/models/custom_modeling/idefics2.py
+++ b/server/text_generation_server/models/custom_modeling/idefics2.py
@@ -25,6 +25,7 @@ from transformers.activations import ACT2FN
 from text_generation_server.models.custom_modeling.vlm import (
     load_text_model,
 )
+from text_generation_server.layers.attention import Seqlen
 from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask
 
 from text_generation_server.layers import (
@@ -740,7 +741,7 @@ class Idefics2ForConditionalGeneration(nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -826,7 +827,7 @@ class Idefics2ForConditionalGeneration(nn.Module):
             kv_cache=kv_cache,
             block_tables=block_tables,
             slots=slots,
-            input_lengths=input_lengths,
+            seqlen=seqlen,
             max_s=max_s,
             true_max_s=max_s,
             prefill_cache_indices=None,
diff --git a/server/text_generation_server/models/custom_modeling/llava_next.py b/server/text_generation_server/models/custom_modeling/llava_next.py
index 29f5b9c7..32e9d334 100644
--- a/server/text_generation_server/models/custom_modeling/llava_next.py
+++ b/server/text_generation_server/models/custom_modeling/llava_next.py
@@ -23,6 +23,7 @@ from torch import nn
 from transformers.activations import ACT2FN
 from transformers.image_processing_utils import select_best_resolution
 
+from text_generation_server.layers.attention import Seqlen
 from text_generation_server.models.custom_modeling.vlm import (
     load_text_model,
     load_vision_model,
@@ -170,7 +171,7 @@ class LlavaNextForConditionalGeneration(nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor,
+        seqlen: Seqlen,
         max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
@@ -276,7 +277,7 @@ class LlavaNextForConditionalGeneration(nn.Module):
             kv_cache=kv_cache,
             block_tables=block_tables,
             slots=slots,
-            input_lengths=input_lengths,
+            seqlen=seqlen,
             max_s=max_s,
             true_max_s=max_s,
             prefill_cache_indices=None,
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index dd4203e0..9a60d06c 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -43,7 +43,7 @@ from text_generation_server.models.globals import (
     ATTENTION,
     BLOCK_SIZE,
     CUDA_GRAPHS,
-    PREFIX_CACHING,
+    TGI_WIGGLE_ROOM,
     get_adapter_to_index,
 )
 from text_generation_server.layers.attention import Seqlen
@@ -189,16 +189,21 @@ class FlashCausalLMBatch(Batch):
     def batch_tokenized_inputs(
         cls, requests: Iterable[generate_pb2.Request], tokenizer
     ):
-        batch_inputs = []
-        max_truncation = 0
+        max_length = 0
+        all_input_ids = []
+        batch_size = 0
         for r in requests:
-            batch_inputs.append(concat_text_chunks(r.input_chunks.chunks))
-            max_truncation = max(max_truncation, r.truncate)
-
-        batch_tokenized_inputs = tokenizer(
-            batch_inputs, truncation=True, max_length=max_truncation
-        )["input_ids"]
-        return batch_tokenized_inputs
+            batch_size += 1
+            inputs = concat_text_chunks(r.input_chunks.chunks)
+            input_ids = tokenizer(
+                inputs,
+                truncation=True,
+                max_length=r.truncate,
+                add_special_tokens=r.add_special_tokens,
+            )["input_ids"]
+            max_length = max(max_length, len(input_ids))
+            all_input_ids.append(input_ids)
+        return all_input_ids
 
     @classmethod
     def from_tokenized(
@@ -257,22 +262,15 @@ class FlashCausalLMBatch(Batch):
             # request id -> idx in list mapping
             requests_idx_mapping[r.id] = i
 
-            tokenized_input = tokenized_input[-r.truncate :]
-            if (
-                tokenized_input[0] == tokenizer.bos_token_id
-                and tokenized_input[1] == tokenizer.bos_token_id
-            ):
-                tokenized_input = tokenized_input[1:]
-
             orig_input_length = len(tokenized_input)
 
-            if PREFIX_CACHING:
-                prefix_len = r.prefix_len
-                if prefix_len == orig_input_length:
-                    assert prefix_len > 0
-                    prefix_len -= 1
-            else:
-                prefix_len = 0
+            prefix_len = r.prefix_len
+            assert (
+                prefix_len <= orig_input_length
+            ), f"Prefix {prefix_len} vs input {orig_input_length}"
+            if prefix_len == orig_input_length:
+                assert prefix_len > 0
+                prefix_len -= 1
 
             prefix_ids.append(tokenized_input[:prefix_len])
             tokenized_input = tokenized_input[prefix_len:]
@@ -998,7 +996,7 @@ class FlashCausalLM(Model):
             config.sliding_window = None
 
         self.num_layers = config.num_hidden_layers
-        self.num_heads = config.num_attention_heads
+        self.num_heads = config.num_attention_heads // self.process_group.size()
         # Validation is done in the model itself
         if num_kv_heads is None:
             num_kv_heads = getattr(config, "num_key_value_heads", None)
@@ -1160,8 +1158,15 @@ class FlashCausalLM(Model):
             "block_tables": block_tables,
             "slots": slots,
             "input_lengths": input_lengths_tensor,
+            "prefix_lengths": prefix_lengths_tensor,
         }
-        input_lengths_ = Seqlen(input_lengths=input_lengths_tensor)
+        seqlen = Seqlen(
+            input_lengths=input_lengths_tensor,
+            prefix_lengths=prefix_lengths_tensor,
+            cu_seqlen_q=None,
+            max_q=1,
+            max_k=max_s,
+        )
         graph = torch.cuda.CUDAGraph()
         self.cuda_graphs[bs]["graph"] = graph
 
@@ -1204,7 +1209,7 @@ class FlashCausalLM(Model):
                 kv_cache=self.kv_cache,
                 block_tables=block_tables,
                 slots=slots,
-                input_lengths=input_lengths_,
+                seqlen=seqlen,
                 max_s=max_s,
                 prefill_cache_indices=None,
                 lm_head_indices=None,
@@ -1213,7 +1218,13 @@ class FlashCausalLM(Model):
             torch.cuda.synchronize()
 
             with torch.cuda.graph(graph, pool=MEM_POOL):
-                input_lengths_tensor = Seqlen(input_lengths=input_lengths_tensor)
+                seqlen = Seqlen(
+                    input_lengths=input_lengths_tensor,
+                    prefix_lengths=prefix_lengths_tensor,
+                    cu_seqlen_q=None,
+                    max_q=1,
+                    max_k=max_s,
+                )
                 logits, speculative_logits = self.model.forward(
                     input_ids=input_ids,
                     position_ids=position_ids,
@@ -1221,7 +1232,7 @@ class FlashCausalLM(Model):
                     kv_cache=self.kv_cache,
                     block_tables=block_tables,
                     slots=slots,
-                    input_lengths=input_lengths_tensor,
+                    seqlen=seqlen,
                     max_s=max_s,
                     prefill_cache_indices=None,
                     lm_head_indices=None,
@@ -1268,7 +1279,7 @@ class FlashCausalLM(Model):
 
         num_blocks = (
             # Leave 5% for some wiggle room
-            int((free_memory * 0.95) // total_cache_size)
+            int((free_memory * TGI_WIGGLE_ROOM) // total_cache_size)
             # Add batch.num_blocks as we allocated it above, so it is included in the peak memory.
             + batch_num_blocks
         )
@@ -1360,18 +1371,26 @@ class FlashCausalLM(Model):
 
         # Dummy value, some models (starcoder2) don't accept `None`.
         input_lengths = torch.ones(seqlen, dtype=torch.int32, device=self.device)
-        input_lengths = Seqlen(input_lengths=input_lengths)
+        prefix_lens_tensor = torch.zeros(seqlen, dtype=torch.int32, device=self.device)
+        cu_seqlen_prefill = torch.tensor(
+            [0, seqlen], device=self.device, dtype=torch.int32
+        )
+        seqlen = Seqlen(
+            input_lengths=input_lengths,
+            prefix_lengths=prefix_lens_tensor,
+            cu_seqlen_q=cu_seqlen_prefill,
+            max_q=1,
+            max_k=seqlen,
+        )
 
         # We pass a `cu_seqlen_prefill` in order not to have to deal with paged attention cache allocation/deallocation.
         self.model.forward(
             input_ids=input_ids,
             position_ids=position_ids,
-            cu_seqlen_prefill=torch.tensor(
-                [0, seqlen], device=self.device, dtype=torch.int32
-            ),
+            cu_seqlen_prefill=cu_seqlen_prefill,
             kv_cache=self.kv_cache,
             block_tables=None,
-            input_lengths=input_lengths,
+            seqlen=seqlen,
             slots=slots,
             max_s=seqlen,
             lm_head_indices=None,
@@ -1451,8 +1470,7 @@ class FlashCausalLM(Model):
             cuda_graph = None
 
         if cu_seqlen_prefill is not None or cuda_graph is None:
-            input_lengths = input_lengths + prefix_lens_tensor
-            if PREFIX_CACHING:
+            if ATTENTION == "flashinfer":
                 block_tables = block_tables_to_ragged(
                     block_tables=block_tables,
                     input_lengths=batch.input_lengths,
@@ -1462,11 +1480,18 @@ class FlashCausalLM(Model):
                 block_tables=block_tables,
                 cu_seqlen_prefill=cu_seqlen_prefill,
                 input_lengths=batch.input_lengths,
-                input_lengths_tensor=input_lengths,
+                input_lengths_tensor=input_lengths + prefix_lens_tensor,
                 prefix_lens=batch.prefix_lens,
                 prefix_lens_tensor=prefix_lens_tensor,
             ):
-                input_lengths = Seqlen(input_lengths=input_lengths)
+                max_k = (input_lengths + prefix_lens_tensor).max().item()
+                seqlen = Seqlen(
+                    input_lengths=input_lengths,
+                    prefix_lengths=prefix_lens_tensor,
+                    cu_seqlen_q=cu_seqlen_prefill,
+                    max_q=max_s,
+                    max_k=max_k,
+                )
                 logits, speculative_logits = self.model.forward(
                     input_ids=input_ids,
                     position_ids=position_ids,
@@ -1474,7 +1499,7 @@ class FlashCausalLM(Model):
                     kv_cache=kv_cache,
                     block_tables=block_tables,
                     slots=slots,
-                    input_lengths=input_lengths,
+                    seqlen=seqlen,
                     max_s=max_s,
                     prefill_cache_indices=batch.prefill_cache_indices,
                     lm_head_indices=lm_head_indices,
diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py
index d5133f5e..6c518c2c 100644
--- a/server/text_generation_server/models/globals.py
+++ b/server/text_generation_server/models/globals.py
@@ -5,19 +5,22 @@ from typing import Dict, Optional
 
 from text_generation_server.utils.log import log_master
 
-PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", "0").lower() in {"1", "true"}
+PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING").lower() in {"1", "true"}
 log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
-ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged")
+ATTENTION = os.getenv("ATTENTION")
 _expected = {"paged", "flashdecoding", "flashinfer"}
 assert (
     ATTENTION in _expected
 ), f"Attention is not valid {ATTENTION}, expected {_expected}"
 log_master(logger.info, f"Using Attention = {ATTENTION}")
 
-if PREFIX_CACHING and ATTENTION != "flashinfer":
+if PREFIX_CACHING and ATTENTION not in {"flashinfer", "flashdecoding"}:
     raise RuntimeError("Prefix caching is only supported with flashinfer")
 
 MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
+TGI_WIGGLE_ROOM = float(os.getenv("TGI_WIGGLE_ROOM", "0.95"))
+assert TGI_WIGGLE_ROOM > 0
+assert TGI_WIGGLE_ROOM < 1
 
 # This is overridden by the cli
 BLOCK_SIZE: int
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index 2ed1a119..d6cb36fa 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -372,7 +372,14 @@ class VlmCausalLM(FlashCausalLM):
                 prefix_lens=batch.prefix_lens,
                 prefix_lens_tensor=prefix_lens_tensor,
             ):
-                input_lengths = Seqlen(input_lengths=input_lengths)
+                max_k = (input_lengths + prefix_lens_tensor).max().item()
+                seqlen = Seqlen(
+                    input_lengths=input_lengths,
+                    prefix_lengths=prefix_lens_tensor,
+                    cu_seqlen_q=cu_seqlen_prefill,
+                    max_q=max_s,
+                    max_k=max_k,
+                )
                 logits, speculative_logits = self.model.forward(
                     input_ids=input_ids,
                     position_ids=position_ids,
@@ -380,7 +387,7 @@ class VlmCausalLM(FlashCausalLM):
                     kv_cache=kv_cache,
                     block_tables=block_tables,
                     slots=slots,
-                    input_lengths=input_lengths,
+                    seqlen=seqlen,
                     max_s=max_s,
                     prefill_cache_indices=batch.prefill_cache_indices,
                     lm_head_indices=lm_head_indices,

From d5202c46f7c42ea81f75dbc99f6eb1c5697c6b40 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 29 Aug 2024 10:32:38 -0400
Subject: [PATCH 09/37] feat: add /v1/models endpoint (#2433)

* feat: add /v1/models endpoint

* feat: add /v1/models endpoint

* fix: remove unused type import

* fix: revert route typo

* fix: update docs with new endpoint

* fix: add to redocly ignore and lint
---
 .redocly.lint-ignore.yaml |  1 +
 docs/openapi.json         | 60 +++++++++++++++++++++++++++++++++++++++
 router/src/lib.rs         | 28 ++++++++++++++++++
 router/src/server.rs      | 29 ++++++++++++++++++-
 4 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/.redocly.lint-ignore.yaml b/.redocly.lint-ignore.yaml
index 382c9ab6..13b80497 100644
--- a/.redocly.lint-ignore.yaml
+++ b/.redocly.lint-ignore.yaml
@@ -77,3 +77,4 @@ docs/openapi.json:
     - '#/paths/~1tokenize/post'
     - '#/paths/~1v1~1chat~1completions/post'
     - '#/paths/~1v1~1completions/post'
+    - '#/paths/~1v1~1models/get'
diff --git a/docs/openapi.json b/docs/openapi.json
index fd64a3ab..691705f2 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -556,6 +556,37 @@
           }
         }
       }
+    },
+    "/v1/models": {
+      "get": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Get model info",
+        "operationId": "openai_get_model_info",
+        "responses": {
+          "200": {
+            "description": "Served model info",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ModelInfo"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Model not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                }
+              }
+            }
+          }
+        }
+      }
     }
   },
   "components": {
@@ -1747,6 +1778,35 @@
           }
         ]
       },
+      "ModelInfo": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "owned_by"
+        ],
+        "properties": {
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "example": 1686935002,
+            "minimum": 0
+          },
+          "id": {
+            "type": "string",
+            "example": "gpt2"
+          },
+          "object": {
+            "type": "string",
+            "example": "model"
+          },
+          "owned_by": {
+            "type": "string",
+            "example": "openai"
+          }
+        }
+      },
       "OutputMessage": {
         "oneOf": [
           {
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 979f6dd1..a1e1dadf 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -1261,6 +1261,34 @@ pub(crate) struct ErrorResponse {
     pub error_type: String,
 }
 
+#[derive(Serialize, Deserialize, ToSchema)]
+pub(crate) struct ModelInfo {
+    #[schema(example = "gpt2")]
+    pub id: String,
+    #[schema(example = "model")]
+    pub object: String,
+    #[schema(example = 1686935002)]
+    pub created: u64,
+    #[schema(example = "openai")]
+    pub owned_by: String,
+}
+
+#[derive(Serialize, Deserialize, ToSchema)]
+pub(crate) struct ModelsInfo {
+    #[schema(example = "list")]
+    pub object: String,
+    pub data: Vec<ModelInfo>,
+}
+
+impl Default for ModelsInfo {
+    fn default() -> Self {
+        ModelsInfo {
+            object: "list".to_string(),
+            data: Vec::new(),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/router/src/server.rs b/router/src/server.rs
index f273a786..d3d34215 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -24,6 +24,7 @@ use crate::{
     VertexResponse,
 };
 use crate::{FunctionDefinition, HubPreprocessorConfig, ToolCall, ToolChoice, ToolType};
+use crate::{ModelInfo, ModelsInfo};
 use async_stream::__private::AsyncStream;
 use axum::extract::Extension;
 use axum::http::{HeaderMap, HeaderValue, Method, StatusCode};
@@ -116,6 +117,29 @@ async fn get_model_info(info: Extension<Info>) -> Json<Info> {
     Json(info.0)
 }
 
+#[utoipa::path(
+get,
+tag = "Text Generation Inference",
+path = "/v1/models",
+responses(
+(status = 200, description = "Served model info", body = ModelInfo),
+(status = 404, description = "Model not found", body = ErrorResponse),
+)
+)]
+#[instrument(skip(info))]
+/// Get model info
+async fn openai_get_model_info(info: Extension<Info>) -> Json<ModelsInfo> {
+    Json(ModelsInfo {
+        data: vec![ModelInfo {
+            id: info.0.model_id.clone(),
+            object: "model".to_string(),
+            created: 0, // TODO: determine how to get this
+            owned_by: info.0.model_id.clone(),
+        }],
+        ..Default::default()
+    })
+}
+
 #[utoipa::path(
     post,
     tag = "Text Generation Inference",
@@ -1505,6 +1529,7 @@ chat_completions,
 completions,
 tokenize,
 metrics,
+openai_get_model_info,
 ),
 components(
 schemas(
@@ -1557,6 +1582,7 @@ ToolCall,
 Function,
 FunctionDefinition,
 ToolChoice,
+ModelInfo,
 )
 ),
 tags(
@@ -2250,7 +2276,8 @@ async fn start(
         .route("/info", get(get_model_info))
         .route("/health", get(health))
         .route("/ping", get(health))
-        .route("/metrics", get(metrics));
+        .route("/metrics", get(metrics))
+        .route("/v1/models", get(openai_get_model_info));
 
     // Conditional AWS Sagemaker route
     let aws_sagemaker_route = if messages_api_enabled {

From 9883f3b40e8e76cecc9807c438fc08c11e602b7a Mon Sep 17 00:00:00 2001
From: "Wang, Yi" <yi.a.wang@intel.com>
Date: Thu, 29 Aug 2024 23:42:02 +0800
Subject: [PATCH 10/37] update doc with intel cpu part (#2420)

* update doc with intel cpu part

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>

* Apply suggestions from code review

we do not use latest ever in documentation, it causes too many issues for users. Release number get update on every release.

---------

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
---
 docs/source/installation_intel.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/docs/source/installation_intel.md b/docs/source/installation_intel.md
index b3843490..3084a436 100644
--- a/docs/source/installation_intel.md
+++ b/docs/source/installation_intel.md
@@ -12,7 +12,24 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
 docker run --rm --privileged --cap-add=sys_nice \
     --device=/dev/dri \
     --ipc=host --shm-size 1g --net host -v $volume:/data \
-    ghcr.io/huggingface/text-generation-inference:2.2.0-intel \
+    ghcr.io/huggingface/text-generation-inference:2.2.0-intel-xpu \
+    --model-id $model --cuda-graphs 0
+```
+
+# Using TGI with Intel CPUs
+
+Intel® Extension for PyTorch (IPEX) also provides further optimizations for Intel CPUs. The IPEX provides optimization operations such as flash attention, page attention, Add + LayerNorm, ROPE and more.
+
+On a server powered by Intel CPU, TGI can be launched with the following command:
+
+```bash
+model=teknium/OpenHermes-2.5-Mistral-7B
+volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
+
+docker run --rm --privileged --cap-add=sys_nice \
+    --device=/dev/dri \
+    --ipc=host --shm-size 1g --net host -v $volume:/data \
+    ghcr.io/huggingface/text-generation-inference:2.2.0-intel-cpu \
     --model-id $model --cuda-graphs 0
 ```
 

From d9fbbaafb046bb423e31edaf9ccf8eecc2d5c33d Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 29 Aug 2024 17:44:54 +0200
Subject: [PATCH 11/37] Tied embeddings in MLP speculator. (#2473)

* Tied embeddings in MLP speculator.

* Fixing the scale_weight when users decide to not use the speculation as
much as defined in the config.

* Adding scaling support + optimize some ops.
---
 server/text_generation_server/layers/mlp.py   | 120 +++++++++++++++++-
 .../text_generation_server/models/__init__.py |   5 +
 2 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/server/text_generation_server/layers/mlp.py b/server/text_generation_server/layers/mlp.py
index f08cb673..d33b41f3 100644
--- a/server/text_generation_server/layers/mlp.py
+++ b/server/text_generation_server/layers/mlp.py
@@ -45,12 +45,107 @@ class MLPSpeculatorLayerNorm(nn.Module):
         return x
 
 
+INV_SQRT2 = 2**-0.5
+
+
+def simple_norm(x: torch.Tensor, eps=1e-06):
+    xf = x
+    xf = xf * torch.rsqrt(xf.pow(2).mean(-1, keepdim=True) + eps)
+    x = xf.type_as(x)
+    return x * INV_SQRT2
+
+
+class MLPSpeculatorModelTied(torch.nn.Module):
+    def __init__(self, config, prefix, weights):
+        super().__init__()
+        self.config = config
+        self.n_predict = get_speculate()
+        self.hidden_size = config.hidden_size
+
+        self.emb = TensorParallelEmbedding(f"{prefix}.emb.0", weights)
+        self.proj0 = FastLinear.load(
+            config,
+            prefix=f"{prefix}.proj.0",
+            weights=weights,
+            bias=False,
+        )
+        self.proj1 = FastLinear.load(
+            config,
+            prefix=f"{prefix}.proj.1",
+            weights=weights,
+            bias=False,
+        )
+        self.head = FastLinear.load(config, f"{prefix}.head.0", weights, bias=False)
+        self.ln = MLPSpeculatorLayerNorm(
+            prefix=f"{prefix}.ln.0",
+            config=config,
+            weights=weights,
+        )
+
+        # Weights ensure that state_0 accounts for 50% of state magnitude by final head in expectation
+        self.state_weight = 0.5 ** (0.5 / self.n_predict) if self.n_predict > 0 else 1
+        self.activation = nn.GELU()
+        self.vsize = config.vocab_size
+        self.inner_dim = config.speculator_config["inner_dim"]
+        self.top_k_tokens_per_head = [1] * self.n_predict
+        self.emb_weight = math.sqrt(1 - self.state_weight**2) * math.sqrt(
+            self.inner_dim / 2
+        )
+        self.emb.weight *= self.emb_weight
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_ids: torch.Tensor,
+    ):
+        top_k_tokens_per_head = self.top_k_tokens_per_head
+
+        # k indicates # of candidates
+        # h indicates # of generated tokens
+        state = hidden_states
+        b = state.size(0)
+        ind = input_ids.unsqueeze(0)
+        all_probs = torch.empty(
+            b, self.n_predict, self.vsize, device=state.device
+        )  # b k h v
+        assert (
+            len(top_k_tokens_per_head) == self.n_predict
+        ), f"You must provide a topk number for each head ({self.n_predict} heads, {len(top_k_tokens_per_head)} provided)"
+        for i in range(self.n_predict):
+            # Project and predict
+            z = self.emb(ind)
+            # z = z.mul(self.emb_weight)  # b k d
+            if i == 0:
+                state = self.proj0(state) * self.state_weight + z
+            else:
+                state = self.proj1(state) * self.state_weight + z
+            state = self.activation(self.ln(state))  # b k d
+            probs = F.log_softmax(self.head(state), dim=-1)  # b k v
+            _probs, preds = probs.topk(top_k_tokens_per_head[i], dim=-1)  # b k k'
+
+            # Update candidate set with new predictions
+
+            # Update distribution set with new logits
+            all_probs[:, i] = probs.exp()
+
+            # Update state, log_probs and ind for new predictions
+            state = state.unsqueeze(2).expand(
+                -1, -1, top_k_tokens_per_head[i], -1
+            )  # b k k' d
+            state = state.reshape(-1, b, state.size(3))  # b kk' d
+            ind = preds.view(-1, b)  # b kk'
+
+        speculative_logits = all_probs
+        return speculative_logits
+
+
 class MLPSpeculatorModel(torch.nn.Module):
     def __init__(self, config, prefix, weights):
         super().__init__()
         self.config = config
         self.n_predict = get_speculate()
         self.hidden_size = config.hidden_size
+
         self.emb = nn.ModuleList(
             [
                 TensorParallelEmbedding(f"{prefix}.emb.{i}", weights)
@@ -84,13 +179,15 @@ class MLPSpeculatorModel(torch.nn.Module):
         )
 
         # Weights ensure that state_0 accounts for 50% of state magnitude by final head in expectation
-        self.state_weight = 0.5 ** (0.5 / self.n_predict)
-        self.emb_weight = math.sqrt(1 - self.state_weight**2)
+        self.state_weight = 0.5 ** (0.5 / self.n_predict) if self.n_predict > 0 else 1
         self.activation = nn.GELU()
-        # TODO
         self.vsize = config.vocab_size
         self.inner_dim = config.speculator_config["inner_dim"]
         self.top_k_tokens_per_head = [1] * self.n_predict
+        self.emb_weight = math.sqrt(1 - self.state_weight**2) * math.sqrt(
+            self.inner_dim / 2
+        )
+        self.emb.weight *= self.emb_weight
 
     def forward(
         self,
@@ -113,7 +210,7 @@ class MLPSpeculatorModel(torch.nn.Module):
         for i in range(self.n_predict):
             # Project and predict
             z = self.emb[i](ind)
-            z = z.mul(self.emb_weight * math.sqrt(self.inner_dim / 2))  # b k d
+            # z = z.mul(self.emb_weight)  # b k d
             state = self.proj[i](state) * self.state_weight + z
             state = self.activation(self.ln[i](state))  # b k d
             probs = F.log_softmax(self.head[i](state), dim=-1)  # b k v
@@ -136,10 +233,11 @@ class MLPSpeculatorModel(torch.nn.Module):
 
 
 class MLPSpeculatorHead(nn.Module):
-    def __init__(self, lm_head, mlp_speculator):
+    def __init__(self, lm_head, mlp_speculator, scale_input: bool):
         super().__init__()
         self.lm_head = lm_head
         self.mlp_speculator = mlp_speculator
+        self.scale_input = scale_input
 
     def forward(
         self, input: torch.Tensor
@@ -150,6 +248,8 @@ class MLPSpeculatorHead(nn.Module):
             return logits, None
 
         input_ids = logits.argmax(dim=-1)
+        if self.scale_input:
+            input = simple_norm(input)
         speculative_logits = self.mlp_speculator(input, input_ids)
         return logits, speculative_logits
 
@@ -171,6 +271,12 @@ class MLPSpeculatorHead(nn.Module):
                         )
                     routing[k] = filename
 
-        mlp_speculator = MLPSpeculatorModel(config, "speculator", weights)
+        tie_weights = config.speculator_config.get("tie_weights", False)
+        if tie_weights:
+            mlp_speculator = MLPSpeculatorModelTied(config, "speculator", weights)
+        else:
+            mlp_speculator = MLPSpeculatorModel(config, "speculator", weights)
+        # This is used in https://huggingface.co/ibm-fms/llama3-70b-accelerator
+        scale_input = config.speculator_config.get("scale_input", False)
         lm_head = TensorParallelHead.load(config, prefix, weights)
-        return MLPSpeculatorHead(lm_head, mlp_speculator)
+        return MLPSpeculatorHead(lm_head, mlp_speculator, scale_input)
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index e03cc30d..52f332c1 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -458,6 +458,11 @@ def get_model(
                     revision=mlp_revision,
                     filename=filename,
                 )
+            speculator_dir_path = Path(mlp_speculator_config).parent
+            # if these are downloaded, they get converted to safetensors
+            filenames.extend(
+                [p for p in os.listdir(speculator_dir_path) if p.endswith(extension)]
+            )
             speculator = {
                 "path": Path(mlp_speculator_config).parent,
                 "model_paths": filenames,

From e4ab8554803eb1b6f0cf2b546dd5b4e7176bd99d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 2 Sep 2024 09:27:10 +0200
Subject: [PATCH 12/37] nix: improve impure devshell (#2478)

- Add some test dependencies.
- Install server in venv.
- Install Python client in venv.
---
 flake.nix | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/flake.nix b/flake.nix
index 0739c90a..82f9ab1d 100644
--- a/flake.nix
+++ b/flake.nix
@@ -79,16 +79,22 @@
               ]
               ++ (with python3.pkgs; [
                 venvShellHook
+                docker
                 pip
                 ipdb
+                pytest
+                pytest-asyncio
+                syrupy
               ]);
 
             inputsFrom = [ server ];
 
             venvDir = "./.venv";
 
-            postVenv = ''
+            postVenvCreation = ''
               unset SOURCE_DATE_EPOCH
+              ( cd server ; python -m pip install --no-dependencies -e . )
+              ( cd clients/python ; python -m pip install --no-dependencies -e . )
             '';
             postShellHook = ''
               unset SOURCE_DATE_EPOCH

From de2cdeca530f54f5605c71452f9087c1d1a9fd5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 2 Sep 2024 11:31:36 +0200
Subject: [PATCH 13/37] nix: add punica-kernels (#2477)

Enables LoRA support.
---
 flake.lock     | 6 +++---
 nix/server.nix | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index c0a696b1..c5b3b1ff 100644
--- a/flake.lock
+++ b/flake.lock
@@ -944,11 +944,11 @@
         "nixpkgs": "nixpkgs_6"
       },
       "locked": {
-        "lastModified": 1724784743,
-        "narHash": "sha256-NdEoWeNwR/ZstYnHaiQWIYZvr7VsrAh7g3+ZHUPrxuI=",
+        "lastModified": 1725011596,
+        "narHash": "sha256-zfq8lOXFgJnKxxsqSelHuKUvhxgH3cEmLoAgsOO62Cg=",
         "owner": "danieldk",
         "repo": "tgi-nix",
-        "rev": "c9580c3e39a855246bb87b584bbea1885b44f524",
+        "rev": "717c2b07e38538abf05237cca65b2d1363c2c9af",
         "type": "github"
       },
       "original": {
diff --git a/nix/server.nix b/nix/server.nix
index 6ee088e0..cfdb3f01 100644
--- a/nix/server.nix
+++ b/nix/server.nix
@@ -26,6 +26,7 @@
   opentelemetry-instrumentation-grpc,
   opentelemetry-semantic-conventions,
   peft,
+  punica-kernels,
   safetensors,
   tokenizers,
   torch,
@@ -92,6 +93,7 @@ buildPythonPackage {
     opentelemetry-instrumentation-grpc
     opentelemetry-semantic-conventions
     peft
+    punica-kernels
     safetensors
     sentencepiece
     tokenizers

From 47d7e344587198ded8a6c89e481b35f4d847fbcf Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 2 Sep 2024 10:00:52 -0400
Subject: [PATCH 14/37] fix: enable chat requests in vertex endpoint (#2481)

* fix: enable chat requests in vertex endpoint

* feat: avoid unwrap and pre allocate future vec
---
 router/src/lib.rs    |   9 ++-
 router/src/server.rs | 146 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 124 insertions(+), 31 deletions(-)

diff --git a/router/src/lib.rs b/router/src/lib.rs
index a1e1dadf..d8029c72 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -55,13 +55,20 @@ impl std::str::FromStr for Attention {
 }
 
 #[derive(Clone, Deserialize, ToSchema)]
-pub(crate) struct VertexInstance {
+pub(crate) struct GenerateVertexInstance {
     #[schema(example = "What is Deep Learning?")]
     pub inputs: String,
     #[schema(nullable = true, default = "null", example = "null")]
     pub parameters: Option<GenerateParameters>,
 }
 
+#[derive(Clone, Deserialize, ToSchema)]
+#[serde(untagged)]
+enum VertexInstance {
+    Generate(GenerateVertexInstance),
+    Chat(ChatRequest),
+}
+
 #[derive(Deserialize, ToSchema)]
 pub(crate) struct VertexRequest {
     #[serde(rename = "instances")]
diff --git a/router/src/server.rs b/router/src/server.rs
index d3d34215..fac56a77 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -8,7 +8,7 @@ use crate::kserve::{
     kserve_model_metadata, kserve_model_metadata_ready,
 };
 use crate::validation::ValidationError;
-use crate::{default_tool_prompt, ChatTokenizeResponse};
+use crate::{default_tool_prompt, ChatTokenizeResponse, VertexInstance};
 use crate::{
     usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
     GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
@@ -1406,12 +1406,12 @@ async fn vertex_compatibility(
         ));
     }
 
-    // Process all instances
-    let predictions = req
-        .instances
-        .iter()
-        .map(|instance| {
-            let generate_request = GenerateRequest {
+    // Prepare futures for all instances
+    let mut futures = Vec::with_capacity(req.instances.len());
+
+    for instance in req.instances.iter() {
+        let generate_request = match instance {
+            VertexInstance::Generate(instance) => GenerateRequest {
                 inputs: instance.inputs.clone(),
                 add_special_tokens: true,
                 parameters: GenerateParameters {
@@ -1422,31 +1422,117 @@ async fn vertex_compatibility(
                     decoder_input_details: true,
                     ..Default::default()
                 },
-            };
+            },
+            VertexInstance::Chat(instance) => {
+                let ChatRequest {
+                    model,
+                    max_tokens,
+                    messages,
+                    seed,
+                    stop,
+                    stream,
+                    tools,
+                    tool_choice,
+                    tool_prompt,
+                    temperature,
+                    response_format,
+                    guideline,
+                    presence_penalty,
+                    frequency_penalty,
+                    top_p,
+                    top_logprobs,
+                    ..
+                } = instance.clone();
 
-            async {
-                generate_internal(
-                    Extension(infer.clone()),
-                    compute_type.clone(),
-                    Json(generate_request),
-                    span.clone(),
-                )
-                .await
-                .map(|(_, Json(generation))| generation.generated_text)
-                .map_err(|_| {
-                    (
-                        StatusCode::INTERNAL_SERVER_ERROR,
-                        Json(ErrorResponse {
-                            error: "Incomplete generation".into(),
-                            error_type: "Incomplete generation".into(),
-                        }),
-                    )
-                })
+                let repetition_penalty = presence_penalty.map(|x| x + 2.0);
+                let max_new_tokens = max_tokens.or(Some(100));
+                let tool_prompt = tool_prompt
+                    .filter(|s| !s.is_empty())
+                    .unwrap_or_else(default_tool_prompt);
+                let stop = stop.unwrap_or_default();
+                // enable greedy only when temperature is 0
+                let (do_sample, temperature) = match temperature {
+                    Some(temperature) if temperature == 0.0 => (false, None),
+                    other => (true, other),
+                };
+                let (inputs, grammar, _using_tools) = match prepare_chat_input(
+                    &infer,
+                    response_format,
+                    tools,
+                    tool_choice,
+                    &tool_prompt,
+                    guideline,
+                    messages,
+                ) {
+                    Ok(result) => result,
+                    Err(e) => {
+                        return Err((
+                            StatusCode::BAD_REQUEST,
+                            Json(ErrorResponse {
+                                error: format!("Failed to prepare chat input: {}", e),
+                                error_type: "Input preparation error".to_string(),
+                            }),
+                        ));
+                    }
+                };
+
+                GenerateRequest {
+                    inputs: inputs.to_string(),
+                    add_special_tokens: false,
+                    parameters: GenerateParameters {
+                        best_of: None,
+                        temperature,
+                        repetition_penalty,
+                        frequency_penalty,
+                        top_k: None,
+                        top_p,
+                        typical_p: None,
+                        do_sample,
+                        max_new_tokens,
+                        return_full_text: None,
+                        stop,
+                        truncate: None,
+                        watermark: false,
+                        details: true,
+                        decoder_input_details: !stream,
+                        seed,
+                        top_n_tokens: top_logprobs,
+                        grammar,
+                        adapter_id: model.filter(|m| *m != "tgi").map(String::from),
+                    },
+                }
             }
-        })
-        .collect::<FuturesUnordered<_>>()
-        .try_collect::<Vec<_>>()
-        .await?;
+        };
+
+        let infer_clone = infer.clone();
+        let compute_type_clone = compute_type.clone();
+        let span_clone = span.clone();
+
+        futures.push(async move {
+            generate_internal(
+                Extension(infer_clone),
+                compute_type_clone,
+                Json(generate_request),
+                span_clone,
+            )
+            .await
+            .map(|(_, Json(generation))| generation.generated_text)
+            .map_err(|_| {
+                (
+                    StatusCode::INTERNAL_SERVER_ERROR,
+                    Json(ErrorResponse {
+                        error: "Incomplete generation".into(),
+                        error_type: "Incomplete generation".into(),
+                    }),
+                )
+            })
+        });
+    }
+
+    // execute all futures in parallel, collect results, returning early if any error occurs
+    let results = futures::future::join_all(futures).await;
+    let predictions: Result<Vec<_>, _> = results.into_iter().collect();
+    let predictions = predictions?;
 
     let response = VertexResponse { predictions };
     Ok((HeaderMap::new(), Json(response)).into_response())

From 6cb42f49ae47a117e8f1bdfcdb5cbe42332dc360 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 2 Sep 2024 13:09:06 -0400
Subject: [PATCH 15/37] feat: support lora revisions and qkv_proj weights
 (#2482)

* feat: support lora revisions and qkv_proj weights

* fix: add qkv_proj weights to weight test
---
 server/tests/utils/test_adapter.py            | 62 ++++++++++++++++++-
 .../text_generation_server/models/__init__.py |  3 +-
 .../custom_modeling/flash_llama_modeling.py   |  5 +-
 .../text_generation_server/utils/adapter.py   | 41 +++++++++---
 4 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/server/tests/utils/test_adapter.py b/server/tests/utils/test_adapter.py
index cc1b076d..a27c1055 100644
--- a/server/tests/utils/test_adapter.py
+++ b/server/tests/utils/test_adapter.py
@@ -1,6 +1,54 @@
 import pytest
 from unittest.mock import Mock
-from text_generation_server.utils.adapter import get_attn_weights, get_mlp_weights
+from text_generation_server.utils.adapter import (
+    get_attn_weights,
+    get_mlp_weights,
+    parse_lora_adapters,
+    AdapterInfo,
+)
+
+
+def test_parse_lora_adapters_empty():
+    assert parse_lora_adapters(None) == []
+    assert parse_lora_adapters("") == []
+
+
+def test_parse_lora_adapters_single():
+    result = parse_lora_adapters("adapter1")
+    assert result == [AdapterInfo(id="adapter1", path=None, revision=None)]
+
+
+def test_parse_lora_adapters_with_path():
+    result = parse_lora_adapters("adapter1=path/to/adapter1")
+    assert result == [
+        AdapterInfo(id="adapter1", path="path/to/adapter1", revision=None)
+    ]
+
+
+def test_parse_lora_adapters_with_path_and_revision():
+    result = parse_lora_adapters("adapter1=path/to/adapter1@main")
+    assert result == [
+        AdapterInfo(id="adapter1", path="path/to/adapter1", revision="main")
+    ]
+
+
+def test_parse_lora_adapters_multiple():
+    result = parse_lora_adapters(
+        "adapter1,adapter2=path/to/adapter2,adapter3=path/to/adapter3@dev"
+    )
+    assert result == [
+        AdapterInfo(id="adapter1", path=None, revision=None),
+        AdapterInfo(id="adapter2", path="path/to/adapter2", revision=None),
+        AdapterInfo(id="adapter3", path="path/to/adapter3", revision="dev"),
+    ]
+
+
+def test_parse_lora_adapters_invalid_format():
+    try:
+        parse_lora_adapters("adapter1,invalid=format=test,adapter3")
+        assert False, "Should have raised ValueError"
+    except ValueError as e:
+        assert str(e) == "Invalid LoRA adapter format: invalid=format=test"
 
 
 def test_get_attn_weights():
@@ -22,6 +70,10 @@ def test_get_attn_weights():
             "model.layers.2.self_attn.k_proj",
             mock_layer.self_attn.query_key_value,
         ),
+        (2, "qkv_proj"): (
+            "model.layers.2.self_attn.qkv_proj",
+            mock_layer.self_attn.query_key_value,
+        ),
         (2, "v_proj"): (
             "model.layers.2.self_attn.v_proj",
             mock_layer.self_attn.query_key_value,
@@ -115,6 +167,10 @@ def test_get_attn_weights_llama_compatibility():
             "model.layers.2.self_attn.k_proj",
             mock_layer.self_attn.query_key_value,
         ),
+        (2, "qkv_proj"): (
+            "model.layers.2.self_attn.qkv_proj",
+            mock_layer.self_attn.query_key_value,
+        ),
         (2, "v_proj"): (
             "model.layers.2.self_attn.v_proj",
             mock_layer.self_attn.query_key_value,
@@ -155,6 +211,10 @@ def test_get_attn_weights_gemma_compatibility():
             "model.layers.2.self_attn.k_proj",
             mock_layer.self_attn.query_key_value,
         ),
+        (2, "qkv_proj"): (
+            "model.layers.2.self_attn.qkv_proj",
+            mock_layer.self_attn.query_key_value,
+        ),
         (2, "v_proj"): (
             "model.layers.2.self_attn.v_proj",
             mock_layer.self_attn.query_key_value,
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 52f332c1..fc530b38 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -1259,6 +1259,7 @@ def get_model_with_lora_adapters(
                 "gate_proj",
                 "up_proj",
                 "down_proj",
+                "qkv_proj",
             ]
 
             for layer_name in adapter_layers:
@@ -1286,7 +1287,7 @@ def get_model_with_lora_adapters(
 
             if len(unused_weight_names) > 0:
                 logger.warning(
-                    f"{','.join(adapter_parameters.adapter_ids)} unused adapter weights: {unused_weight_names}"
+                    f"{','.join([a.id for a in lora_adapters])} unused adapter weights: {unused_weight_names}"
                 )
 
             if adapter_tokenizer is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
index 5b228f9f..ae981c9a 100644
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -66,15 +66,15 @@ def load_attention(config, prefix: str, weights, layer_id):
     prefixes = None
 
     if config.model_type == "phi3":
-        prefix = f"{prefix}.qkv_proj"
         base_layer = TensorParallelColumnLinear.load_qkv(
             config,
-            prefix=prefix,
+            prefix=f"{prefix}.qkv_proj",
             weights=weights,
             bias=bias,
             num_heads=config.num_attention_heads,
             num_key_value_heads=config.num_key_value_heads,
         )
+        prefixes = ["qkv_proj"]
     elif config.model_type == "baichuan":
         prefix = f"{prefix}.W_pack"
         base_layer = TensorParallelColumnLinear.load_qkv(
@@ -85,6 +85,7 @@ def load_attention(config, prefix: str, weights, layer_id):
             num_heads=config.num_attention_heads,
             num_key_value_heads=config.num_key_value_heads,
         )
+        prefixes = [prefix]
     else:
         prefixes = ["q_proj", "k_proj", "v_proj"]
         sizes = [
diff --git a/server/text_generation_server/utils/adapter.py b/server/text_generation_server/utils/adapter.py
index 1db5f77b..b7fc89df 100644
--- a/server/text_generation_server/utils/adapter.py
+++ b/server/text_generation_server/utils/adapter.py
@@ -3,6 +3,7 @@
 # License:  Apache License Version 2.0, January 2004
 
 import warnings
+import re
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import TYPE_CHECKING, Set, Tuple, Optional, List
@@ -27,6 +28,7 @@ BASE_MODEL_ADAPTER_ID = "__base_model__"
 class AdapterInfo:
     id: str
     path: Optional[str]
+    revision: Optional[str] = None
 
 
 @dataclass
@@ -51,11 +53,16 @@ def parse_lora_adapters(lora_adapters: Optional[str]) -> List[AdapterInfo]:
 
     adapter_list = []
     for adapter in lora_adapters.split(","):
-        parts = adapter.strip().split("=")
-        if len(parts) == 1:
-            adapter_list.append(AdapterInfo(id=parts[0], path=None))
-        elif len(parts) == 2:
-            adapter_list.append(AdapterInfo(id=parts[0], path=parts[1]))
+        adapter = adapter.strip()
+        if adapter.count("=") > 1 or adapter.count("@") > 1:
+            raise ValueError(f"Invalid LoRA adapter format: {adapter}")
+        match = re.match(r"^([^=@]+)(?:=([^@]+))?(?:@(.+))?$", adapter)
+
+        if match:
+            adapter_id, path, revision = match.groups()
+            adapter_list.append(
+                AdapterInfo(id=adapter_id, path=path, revision=revision)
+            )
         else:
             raise ValueError(f"Invalid LoRA adapter format: {adapter}")
     return adapter_list
@@ -73,6 +80,7 @@ def load_and_merge_adapters(
         adapter_info = next(iter(adapter_parameters.adapter_info))
         return load_module_map(
             model_id,
+            adapter_info.revision,
             adapter_info.id,
             adapter_info.path,
             weight_names,
@@ -80,7 +88,13 @@ def load_and_merge_adapters(
         )
 
     adapter_params = AdapterParametersContainer(adapter_parameters, adapter_index)
-    return _load_and_merge(model_id, adapter_params, weight_names, trust_remote_code)
+    return _load_and_merge(
+        model_id,
+        adapter_params.revision,
+        adapter_params,
+        weight_names,
+        trust_remote_code,
+    )
 
 
 @dataclass
@@ -95,6 +109,7 @@ class AdapterParametersContainer:
 @lru_cache(maxsize=32)
 def _load_and_merge(
     model_id: str,
+    revision: str,
     adapter_params: AdapterParametersContainer,
     weight_names: Tuple[str],
     trust_remote_code: bool = False,
@@ -171,12 +186,12 @@ def check_architectures(
 @lru_cache(maxsize=128)
 def load_module_map(
     model_id: str,
+    revision: str,
     adapter_id: str,
     adapter_path: Optional[str],
     weight_names: Tuple[str],
     trust_remote_code: bool = False,
 ) -> Tuple["ModuleMap", "AdapterConfig", Set[str], PreTrainedTokenizer]:
-    revision = "main"
 
     adapter_config = LoraConfig.load(adapter_path or adapter_id, None)
 
@@ -191,6 +206,12 @@ def load_module_map(
         )
     )
 
+    # throw an error if no adapter weights are found
+    if not adapter_filenames:
+        raise FileNotFoundError(
+            f"No adapter weights found for adapter '{adapter_id}' and revision '{revision}'."
+        )
+
     try:
         adapter_tokenizer = AutoTokenizer.from_pretrained(
             adapter_config.config_path,
@@ -221,6 +242,12 @@ def get_attn_weights(i, layer):
         value = (f"model.layers.{i}.self_attn.{k}_proj", qkv)
         weights[key] = value
 
+    # also add the qkv_proj weight for the adapter
+    weights[(i, "qkv_proj")] = (
+        f"model.layers.{i}.self_attn.qkv_proj",
+        qkv,
+    )
+
     weights[(i, "o_proj")] = (
         f"model.layers.{i}.self_attn.o_proj",
         layer.self_attn.o_proj,

From deec30f89307c7e51ccc609fb2d0ce1e920505b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 5 Sep 2024 15:09:29 +0200
Subject: [PATCH 16/37] hotfix: avoid non-prefilled block use when using prefix
 caching (#2489)

The minimum batch size logic could cause prefix blocks to be
deallocated without prefill. The next allocation of the same
prefix would then use garbage blocks.
---
 backends/v3/src/backend.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs
index 05a26370..a47e62dc 100644
--- a/backends/v3/src/backend.rs
+++ b/backends/v3/src/backend.rs
@@ -122,7 +122,7 @@ impl Backend for BackendV3 {
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn batching_task(
     mut client: ShardedClient,
-    waiting_served_ratio: f32,
+    _waiting_served_ratio: f32,
     max_batch_prefill_tokens: u32,
     max_batch_total_tokens: u32,
     max_waiting_tokens: usize,
@@ -168,7 +168,10 @@ pub(crate) async fn batching_task(
                     None
                 } else {
                     // Minimum batch size
-                    Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
+                    // TODO: temporarily disable to avoid incorrect deallocation +
+                    //       reallocation when using prefix caching.
+                    // Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
+                    None
                 };
 
                 let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);

From 8b96a18265bec0633e39f5930e81afe3a3bb1463 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 5 Sep 2024 16:11:52 +0200
Subject: [PATCH 17/37] Adding links to Adyen blogpost. (#2492)

---
 README.md                           | 2 ++
 docs/source/conceptual/streaming.md | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 803e9172..cf6a30db 100644
--- a/README.md
+++ b/README.md
@@ -189,6 +189,8 @@ overridden with the `--otlp-service-name` argument
 
 ![TGI architecture](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/TGI.png)
 
+Detailed blogpost by Adyen on TGI inner workings: [LLM inference at scale with TGI](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)
+
 ### Local install
 
 You can also opt to install `text-generation-inference` locally.
diff --git a/docs/source/conceptual/streaming.md b/docs/source/conceptual/streaming.md
index 49c48fa0..f1f37f2a 100644
--- a/docs/source/conceptual/streaming.md
+++ b/docs/source/conceptual/streaming.md
@@ -1,5 +1,6 @@
 # Streaming
 
+
 ## What is Streaming?
 
 Token streaming is the mode in which the server returns the tokens one by one as the model generates them. This enables showing progressive generations to the user rather than waiting for the whole generation. Streaming is an essential aspect of the end-user experience as it reduces latency, one of the most critical aspects of a smooth experience.
@@ -154,3 +155,7 @@ SSEs are different than:
 * Webhooks: where there is a bi-directional connection. The server can send information to the client, but the client can also send data to the server after the first request. Webhooks are more complex to operate as they don’t only use HTTP.
 
 If there are too many requests at the same time, TGI returns an HTTP Error with an `overloaded` error type (`huggingface_hub` returns `OverloadedError`). This allows the client to manage the overloaded server (e.g., it could display a busy error to the user or retry with a new request). To configure the maximum number of concurrent requests, you can specify `--max_concurrent_requests`, allowing clients to handle backpressure.
+
+## External sources
+
+Adyen wrote a nice recap of how TGI streaming feature works. [LLM inference at scale with TGI](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)

From e279b38aca90cddc0ab654e18c369d9c462ebc0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 5 Sep 2024 17:06:54 +0200
Subject: [PATCH 18/37] Add two handy gitignores for Nix environments (#2484)

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index f79d8faa..edcc2f89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,6 @@ server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp
 data/
 load_tests/*.json
 server/fbgemmm
+
+.direnv/
+.venv/

From 5cd8025f1849bd4c13edcf9eb4f72e199e6a5c37 Mon Sep 17 00:00:00 2001
From: "Wang, Yi" <yi.a.wang@intel.com>
Date: Thu, 5 Sep 2024 23:41:39 +0800
Subject: [PATCH 19/37] hotfix: fix regression of attention api change in intel
 platform (#2439)

fix regression caused by attention api change. ipex.varlen_attention does not support paged-cache
format kv input now.

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
---
 Dockerfile_intel                              |  3 +++
 .../layers/attention/ipex.py                  | 22 +++++++++----------
 .../custom_modeling/flash_cohere_modeling.py  |  4 ++--
 .../custom_modeling/flash_dbrx_modeling.py    |  4 ++--
 .../flash_deepseek_v2_modeling.py             |  4 ++--
 .../custom_modeling/flash_gemma2_modeling.py  |  6 ++---
 .../custom_modeling/flash_gemma_modeling.py   |  6 ++---
 .../custom_modeling/flash_gpt2_modeling.py    |  6 ++---
 .../custom_modeling/flash_gptj_modeling.py    |  7 +++---
 .../custom_modeling/flash_llama_modeling.py   |  4 ++--
 .../custom_modeling/flash_mistral_modeling.py |  4 ++--
 .../custom_modeling/flash_mixtral_modeling.py |  4 ++--
 .../custom_modeling/flash_neox_modeling.py    |  6 ++---
 .../custom_modeling/flash_phi_modeling.py     |  5 +++--
 .../custom_modeling/flash_qwen2_modeling.py   |  5 +++--
 .../custom_modeling/flash_rw_modeling.py      | 16 ++++++--------
 .../flash_santacoder_modeling.py              |  5 +++--
 .../flash_starcoder2_modeling.py              |  5 +++--
 18 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/Dockerfile_intel b/Dockerfile_intel
index 9af6422c..0cda4d4b 100644
--- a/Dockerfile_intel
+++ b/Dockerfile_intel
@@ -171,5 +171,8 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
 
 FROM ${PLATFORM} AS final
+ENV ATTENTION=paged
+ENV USE_PREFIX_CACHING=0
+ENV CUDA_GRAPHS=0
 ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]
diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py
index d7cf780a..2d1427ae 100644
--- a/server/text_generation_server/layers/attention/ipex.py
+++ b/server/text_generation_server/layers/attention/ipex.py
@@ -8,11 +8,11 @@ SUPPORTS_WINDOWING = False
 
 
 def attention(
-    q,
-    k,
-    v,
-    cu_seqlens,
-    max_s,
+    q: torch.Tensor,
+    key_cache: torch.Tensor,
+    value_cache: torch.Tensor,
+    seqlen: Seqlen,
+    block_tables: torch.Tensor,
     softmax_scale,
     window_size_left=-1,
     causal=True,
@@ -23,13 +23,13 @@ def attention(
     # We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
     ipex.llm.functional.varlen_attention(
         q,
-        k,
-        v,
+        key_cache,
+        value_cache,
         out,
-        cu_seqlens,
-        cu_seqlens,
-        max_s,
-        max_s,
+        seqlen.cu_seqlen_q,
+        seqlen.cu_seqlen_q,
+        seqlen.max_q,
+        seqlen.max_q,
         0.0,
         softmax_scale,
         False,
diff --git a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
index fe19180a..374ccb10 100644
--- a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
@@ -297,8 +297,8 @@ class FlashCohereAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else key,
+                kv_cache[1] if SYSTEM != "ipex" else value,
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
index b82b5473..0dc88098 100644
--- a/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
@@ -336,8 +336,8 @@ class DbrxAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
index 0585b40e..f62dfe66 100644
--- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py
@@ -363,8 +363,8 @@ class DeepseekV2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else key,
+                kv_cache[1] if SYSTEM != "ipex" else value,
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
index d16e805f..e12bff00 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
@@ -25,7 +25,7 @@ from torch import nn
 from transformers.activations import ACT2FN
 from transformers.configuration_utils import PretrainedConfig
 from typing import Optional, List, Tuple
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers.attention import (
     paged_attention,
     attention,
@@ -237,8 +237,8 @@ class FlashGemma2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
index 34be4cb8..77ae4b35 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
@@ -25,7 +25,7 @@ from torch import nn
 from transformers.activations import ACT2FN
 from transformers.configuration_utils import PretrainedConfig
 from typing import Optional, List, Tuple
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers.attention import (
     paged_attention,
     attention,
@@ -231,8 +231,8 @@ class FlashGemmaAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
index 403fa908..411c4ce1 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gpt2_modeling.py
@@ -24,7 +24,7 @@ import torch.distributed
 from torch import nn
 from transformers.activations import ACT2FN
 from typing import Optional, List, Tuple
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers.attention import (
     paged_attention,
     attention,
@@ -231,8 +231,8 @@ class FlashGPT2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else key,
+                kv_cache[1] if SYSTEM != "ipex" else value,
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
index 35ab2791..ef071d46 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gptj_modeling.py
@@ -24,7 +24,7 @@ import torch.distributed
 from torch import nn
 from transformers.activations import ACT2FN
 from typing import Optional, List, Tuple
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers.attention import (
     paged_attention,
     attention,
@@ -44,7 +44,6 @@ from text_generation_server.layers.rotary import (
 from text_generation_server.layers.layernorm import (
     FastLayerNorm,
 )
-from text_generation_server.utils.import_utils import SYSTEM
 
 
 def load_attention(config, prefix: str, weights):
@@ -193,8 +192,8 @@ class FlashGPTJAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else key,
+                kv_cache[1] if SYSTEM != "ipex" else value,
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
index ae981c9a..7d639e35 100644
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -220,8 +220,8 @@ class FlashLlamaAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
index 30ca3faf..cdd23796 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -218,8 +218,8 @@ class MistralAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv_to_cache[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv_to_cache[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
index c5d60af1..c36e97f6 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -275,8 +275,8 @@ class MixtralAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv_to_cache[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv_to_cache[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index fda648f9..454e45eb 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -26,7 +26,7 @@ from transformers.activations import ACT2FN
 from transformers.modeling_utils import PreTrainedModel
 from transformers.models.gpt_neox import GPTNeoXConfig as TransformersGPTNeoXConfig
 from typing import Optional, List, Tuple
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers.attention import (
     paged_attention,
     attention,
@@ -172,8 +172,8 @@ class FlashNeoxAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 qkv[:, 0],
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else qkv[:, 1],
+                kv_cache[1] if SYSTEM != "ipex" else qkv[:, 2],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
index 37adb8be..e2d9bbbc 100644
--- a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -25,6 +25,7 @@ from text_generation_server.layers.layernorm import (
 from text_generation_server.layers.rotary import (
     PositionRotaryEmbedding,
 )
+from text_generation_server.utils.import_utils import SYSTEM
 
 
 class PhiConfig(PretrainedConfig):
@@ -193,8 +194,8 @@ class FlashPhiAttention(torch.nn.Module):
         if cu_seqlen_prefill is not None:
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
index 5aac28a3..999b72e7 100644
--- a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
@@ -21,6 +21,7 @@ from text_generation_server.layers.rotary import PositionRotaryEmbedding
 from text_generation_server.layers.layernorm import (
     FastRMSNorm,
 )
+from text_generation_server.utils.import_utils import SYSTEM
 
 
 def load_attention(config, prefix, weights):
@@ -136,8 +137,8 @@ class Qwen2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv_to_cache[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv_to_cache[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
index 1c55dd91..edc54c09 100644
--- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
@@ -5,7 +5,7 @@ import torch.distributed
 from torch import nn
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_utils import PreTrainedModel
-
+from text_generation_server.utils.import_utils import SYSTEM
 from text_generation_server.layers import (
     SpeculativeHead,
     TensorParallelColumnLinear,
@@ -207,8 +207,8 @@ class FlashRWAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
@@ -325,12 +325,10 @@ class FlashRWLargeAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                torch.select(kv, dim=2, index=0),
-                torch.select(kv, dim=2, index=1),
-                kv_cache[0],
-                kv_cache[1],
-                cu_seqlen_prefill,
-                max_s,
+                kv_cache[0] if SYSTEM != "ipex" else kv[:, :, 0].contiguous(),
+                kv_cache[1] if SYSTEM != "ipex" else kv[:, :, 1].contiguous(),
+                seqlen,
+                block_tables,
                 self.softmax_scale,
             )
         # Decode
diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
index 19025c4c..f97b4409 100644
--- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
@@ -22,6 +22,7 @@ from text_generation_server.layers.gptq import GPTQWeightsLoader
 from text_generation_server.layers.layernorm import (
     FastLayerNorm,
 )
+from text_generation_server.utils.import_utils import SYSTEM
 
 
 def load_multi_mqa(
@@ -292,8 +293,8 @@ class FlashMQAttention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else key_value[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else key_value[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,
diff --git a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
index 2f9ecd0d..6aa7fa21 100644
--- a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
@@ -47,6 +47,7 @@ from text_generation_server.layers.rotary import (
     PositionRotaryEmbedding,
 )
 from text_generation_server.utils.weights import UnquantizedWeight
+from text_generation_server.utils.import_utils import SYSTEM
 
 
 class Starcoder2Config(PretrainedConfig):
@@ -241,8 +242,8 @@ class Starcoder2Attention(torch.nn.Module):
             # flash attention
             attn_output = attention(
                 query,
-                kv_cache[0],
-                kv_cache[1],
+                kv_cache[0] if SYSTEM != "ipex" else kv_to_cache[:, 0],
+                kv_cache[1] if SYSTEM != "ipex" else kv_to_cache[:, 1],
                 seqlen,
                 block_tables,
                 self.softmax_scale,

From 0424e27f651bf6df492c9ad0ba7c7e9def60f224 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 6 Sep 2024 10:19:04 +0200
Subject: [PATCH 20/37] nix: add pyright/ruff for proper LSP in the impure
 devshell (#2496)

We need this to ensure that pyright/ruff are part of the same
interpreter/venv.
---
 flake.nix | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flake.nix b/flake.nix
index 82f9ab1d..58a8e311 100644
--- a/flake.nix
+++ b/flake.nix
@@ -82,8 +82,10 @@
                 docker
                 pip
                 ipdb
+                pyright
                 pytest
                 pytest-asyncio
+                ruff
                 syrupy
               ]);
 

From 2eb57a15ecc39bdaebef47dbad30293ac82e6a25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 6 Sep 2024 11:00:52 +0200
Subject: [PATCH 21/37] Fix incompatibility with latest `syrupy` and update in
 Poetry (#2497)

---
 integration-tests/conftest.py |  8 +++++++-
 server/poetry.lock            | 14 ++++++++++++++
 server/pyproject.toml         |  1 +
 server/requirements_cuda.txt  |  6 ++++++
 server/requirements_intel.txt |  6 ++++++
 server/requirements_rocm.txt  |  6 ++++++
 6 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index a8a77cd2..f58f5fdf 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -64,6 +64,7 @@ class ResponseComparator(JSONSnapshotExtension):
         self,
         data,
         *,
+        include=None,
         exclude=None,
         matcher=None,
     ):
@@ -79,7 +80,12 @@ class ResponseComparator(JSONSnapshotExtension):
             data = [d.model_dump() for d in data]
 
         data = self._filter(
-            data=data, depth=0, path=(), exclude=exclude, matcher=matcher
+            data=data,
+            depth=0,
+            path=(),
+            exclude=exclude,
+            include=include,
+            matcher=matcher,
         )
         return json.dumps(data, indent=2, ensure_ascii=False, sort_keys=False) + "\n"
 
diff --git a/server/poetry.lock b/server/poetry.lock
index fc1a54a3..49276807 100644
--- a/server/poetry.lock
+++ b/server/poetry.lock
@@ -2945,6 +2945,20 @@ mpmath = ">=1.1.0,<1.4"
 [package.extras]
 dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
 
+[[package]]
+name = "syrupy"
+version = "4.7.1"
+description = "Pytest Snapshot Test Utility"
+optional = false
+python-versions = ">=3.8.1"
+files = [
+    {file = "syrupy-4.7.1-py3-none-any.whl", hash = "sha256:be002267a512a4bedddfae2e026c93df1ea928ae10baadc09640516923376d41"},
+    {file = "syrupy-4.7.1.tar.gz", hash = "sha256:f9d4485f3f27d0e5df6ed299cac6fa32eb40a441915d988e82be5a4bdda335c8"},
+]
+
+[package.dependencies]
+pytest = ">=7.0.0,<9.0.0"
+
 [[package]]
 name = "texttable"
 version = "1.7.0"
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 57deb1b8..66844402 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -47,6 +47,7 @@ marlin-kernels = [
   { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.2.0/marlin_kernels-0.2.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
 ]
 rich = "^13.7.1"
+syrupy = "^4.7.1"
 
 [tool.poetry.extras]
 torch = ["torch"]
diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt
index eb521bd6..71291f7b 100644
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@@ -4,6 +4,7 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
+exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -15,6 +16,7 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -31,10 +33,12 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
+pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -43,7 +47,9 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
+syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_intel.txt b/server/requirements_intel.txt
index eb521bd6..71291f7b 100644
--- a/server/requirements_intel.txt
+++ b/server/requirements_intel.txt
@@ -4,6 +4,7 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
+exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -15,6 +16,7 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -31,10 +33,12 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
+pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -43,7 +47,9 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
+syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt
index eb521bd6..71291f7b 100644
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@@ -4,6 +4,7 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
+exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -15,6 +16,7 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -31,10 +33,12 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
+pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
+pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -43,7 +47,9 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
+syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"

From 379472c4c2e401b1efd66d7d47edc00b96f5ce14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 6 Sep 2024 11:55:23 +0200
Subject: [PATCH 22/37] radix trie: add assertions (#2491)

These should all be cheap assertions.

Also:

* Fixup some comments.
* Delete a `remove` that was done unnecessarily twice.
---
 backends/v3/src/radix.rs | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index b85be00b..bb6582b0 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -73,14 +73,13 @@ impl Allocator for RadixAllocator {
             let node_id = self
                 .cache_blocks
                 .find(prefill_tokens.as_slice(), &mut blocks);
-            // Even if this allocation fails below, we need to increase he
-            // refcount to ensure that the prefix that was found is not evicted.
-
             node_id
         } else {
             self.cache_blocks.root_id()
         };
 
+        // Even if this allocation fails below, we need to increase he
+        // refcount to ensure that the prefix that was found is not evicted.
         self.cache_blocks
             .incref(prefix_node)
             .expect("Failed to increment refcount");
@@ -303,6 +302,11 @@ impl RadixTrie {
 
         node.ref_count -= 1;
         if node.ref_count == 0 {
+            assert!(
+                node.children.is_empty(),
+                "Nodes with children must have refcount > 0"
+            );
+
             self.leaves.insert((node.last_accessed, node_id));
         }
 
@@ -330,7 +334,7 @@ impl RadixTrie {
     /// Evict `n_blocks` from the trie.
     ///
     /// Returns the evicted blocks. When the length is less than `n_blocks`,
-    /// not enough blocks could beevicted.
+    /// not enough blocks could be evicted.
     pub fn evict(&mut self, n_blocks: usize) -> Vec<u32> {
         // NOTE: we don't return Result here. If any of the unwrapping fails,
         // it's a programming error in the trie implementation, not a user
@@ -345,6 +349,12 @@ impl RadixTrie {
             let blocks_needed = n_blocks - evicted.len();
 
             let node = self.nodes.get(node_id).expect("Leave does not exist");
+            assert_eq!(
+                node.ref_count, 0,
+                "Leaf must have refcount of 0, got {}",
+                node.ref_count
+            );
+
             if blocks_needed >= node.blocks.len() {
                 // We need to evict the whole node if we need more blocks than it has.
                 let node = self.remove_node(node_id);
@@ -500,12 +510,16 @@ impl RadixTrie {
     fn remove_node(&mut self, node_id: NodeId) -> TrieNode {
         // Unwrap here, passing in an unknown id is a programming error.
         let node = self.nodes.remove(node_id).expect("Unknown node");
+        assert!(
+            node.children.is_empty(),
+            "Tried to remove a node with {} children",
+            node.children.len()
+        );
         let parent_id = node.parent.expect("Attempted to remove root node");
         let parent = self.nodes.get_mut(parent_id).expect("Unknown parent node");
         parent.children.remove(&node.key[0]);
         self.decref(parent_id)
             .expect("Failed to decrease parent refcount");
-        self.nodes.remove(node_id);
         node
     }
 
@@ -579,6 +593,9 @@ impl TrieNode {
 
 fn shared_prefix(left: &[u32], right: &[u32], block_size: usize) -> usize {
     let full = left.iter().zip(right).take_while(|(a, b)| a == b).count();
+    // NOTE: this is the case because the child node was chosen based on
+    //       matching the first character of the key/prefix.
+    assert!(full > 0, "Prefixes must at least share 1 token");
     (full / block_size) * block_size
 }
 

From a3c9c62dc07a044aeea99b6f80b62a77e3ec384f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 6 Sep 2024 12:47:06 +0200
Subject: [PATCH 23/37] hotfix: add syrupy to the right subproject (#2499)

---
 integration-tests/poetry.lock      | 25 +++++++------------------
 integration-tests/pyproject.toml   |  2 +-
 integration-tests/requirements.txt |  3 +--
 server/poetry.lock                 | 19 -------------------
 server/pyproject.toml              |  1 -
 server/requirements_cuda.txt       |  6 ------
 server/requirements_intel.txt      |  6 ------
 server/requirements_rocm.txt       |  6 ------
 8 files changed, 9 insertions(+), 59 deletions(-)

diff --git a/integration-tests/poetry.lock b/integration-tests/poetry.lock
index 3af99942..8398160e 100644
--- a/integration-tests/poetry.lock
+++ b/integration-tests/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -268,16 +268,6 @@ files = [
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
 
-[[package]]
-name = "colored"
-version = "1.4.4"
-description = "Simple library for color and formatting to terminal"
-optional = false
-python-versions = "*"
-files = [
-    {file = "colored-1.4.4.tar.gz", hash = "sha256:04ff4d4dd514274fe3b99a21bb52fb96f2688c01e93fba7bef37221e7cb56ce0"},
-]
-
 [[package]]
 name = "docker"
 version = "6.1.3"
@@ -855,18 +845,17 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "syrupy"
-version = "4.0.1"
+version = "4.7.1"
 description = "Pytest Snapshot Test Utility"
 optional = false
-python-versions = ">=3.8.1,<4"
+python-versions = ">=3.8.1"
 files = [
-    {file = "syrupy-4.0.1-py3-none-any.whl", hash = "sha256:53d3107cc5e18a5def189c721879cea2cdafdee34b879f602133ca08837d0e4b"},
-    {file = "syrupy-4.0.1.tar.gz", hash = "sha256:60e3e94782444e0f978cd3b207de32f6da3199b15a2db32eab02f83cebb63ae8"},
+    {file = "syrupy-4.7.1-py3-none-any.whl", hash = "sha256:be002267a512a4bedddfae2e026c93df1ea928ae10baadc09640516923376d41"},
+    {file = "syrupy-4.7.1.tar.gz", hash = "sha256:f9d4485f3f27d0e5df6ed299cac6fa32eb40a441915d988e82be5a4bdda335c8"},
 ]
 
 [package.dependencies]
-colored = ">=1.3.92,<2.0.0"
-pytest = ">=7.0.0,<8.0.0"
+pytest = ">=7.0.0,<9.0.0"
 
 [[package]]
 name = "text-generation"
@@ -1049,4 +1038,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "421fbce065cb1499c666599cf0fd83a5ce8fb3bed09e83c16c3a3d6953b34026"
+content-hash = "f5c65e704b02250d73055cd04efcc22f8fc36144eddfc447a71c3a061748db80"
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index 88e9761a..123c1167 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -7,7 +7,7 @@ authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 [tool.poetry.dependencies]
 pydantic = "> 2, < 3"
 python = ">=3.9,<3.13"
-syrupy = "4.0.1"
+syrupy = "^4.7.1"
 text-generation = "^0.6.0"
 pytest = "^7.4.0"
 pytest-asyncio = "^0.21.1"
diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index 3c2ce11b..f3f0569b 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -6,7 +6,6 @@ attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13"
 certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.13"
 charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-colored==1.4.4 ; python_version >= "3.9" and python_version < "3.13"
 docker==6.1.3 ; python_version >= "3.9" and python_version < "3.13"
 exceptiongroup==1.1.3 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.12.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -25,7 +24,7 @@ pytest==7.4.0 ; python_version >= "3.9" and python_version < "3.13"
 pywin32==306 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-syrupy==4.0.1 ; python_version >= "3.9" and python_version < "3.13"
+syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 text-generation==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
 tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/poetry.lock b/server/poetry.lock
index 49276807..ce5b8a6c 100644
--- a/server/poetry.lock
+++ b/server/poetry.lock
@@ -2945,20 +2945,6 @@ mpmath = ">=1.1.0,<1.4"
 [package.extras]
 dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
 
-[[package]]
-name = "syrupy"
-version = "4.7.1"
-description = "Pytest Snapshot Test Utility"
-optional = false
-python-versions = ">=3.8.1"
-files = [
-    {file = "syrupy-4.7.1-py3-none-any.whl", hash = "sha256:be002267a512a4bedddfae2e026c93df1ea928ae10baadc09640516923376d41"},
-    {file = "syrupy-4.7.1.tar.gz", hash = "sha256:f9d4485f3f27d0e5df6ed299cac6fa32eb40a441915d988e82be5a4bdda335c8"},
-]
-
-[package.dependencies]
-pytest = ">=7.0.0,<9.0.0"
-
 [[package]]
 name = "texttable"
 version = "1.7.0"
@@ -3251,11 +3237,6 @@ files = [
     {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
     {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
     {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
-    {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
-    {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
-    {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
-    {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
-    {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
 ]
 
 [package.dependencies]
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 66844402..57deb1b8 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -47,7 +47,6 @@ marlin-kernels = [
   { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.2.0/marlin_kernels-0.2.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
 ]
 rich = "^13.7.1"
-syrupy = "^4.7.1"
 
 [tool.poetry.extras]
 torch = ["torch"]
diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt
index 71291f7b..eb521bd6 100644
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@@ -4,7 +4,6 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -16,7 +15,6 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -33,12 +31,10 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -47,9 +43,7 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
-syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_intel.txt b/server/requirements_intel.txt
index 71291f7b..eb521bd6 100644
--- a/server/requirements_intel.txt
+++ b/server/requirements_intel.txt
@@ -4,7 +4,6 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -16,7 +15,6 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -33,12 +31,10 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -47,9 +43,7 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
-syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt
index 71291f7b..eb521bd6 100644
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@@ -4,7 +4,6 @@ click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
 filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13"
 fsspec==2024.5.0 ; python_version >= "3.9" and python_version < "3.13"
 googleapis-common-protos==1.63.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -16,7 +15,6 @@ hf-transfer==0.1.8 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.23.5 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
 importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 markdown-it-py==3.0.0 ; python_version >= "3.9" and python_version < "3.13"
 mdurl==0.1.2 ; python_version >= "3.9" and python_version < "3.13"
@@ -33,12 +31,10 @@ opentelemetry-sdk==1.25.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.46b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
 pillow==10.4.0 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13"
 prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 py-cpuinfo==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13"
-pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 regex==2024.5.15 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
@@ -47,9 +43,7 @@ safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
 setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13"
-syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.19.1 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.4 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.43.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"

From aaea212d0f53929cd3775af3eaf06f4af0a868a5 Mon Sep 17 00:00:00 2001
From: Martin Iglesias Goyanes <martiniglesiasgo@gmail.com>
Date: Fri, 6 Sep 2024 17:00:54 +0200
Subject: [PATCH 24/37] Add links to Adyen blogpost (#2500)

* Add links to Adyen blogpost

* Adding to toctree.

* Update external.md

* Update _toctree.yml

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
---
 README.md                           | 2 +-
 docs/source/_toctree.yml            | 2 ++
 docs/source/conceptual/external.md  | 4 ++++
 docs/source/conceptual/streaming.md | 4 ----
 4 files changed, 7 insertions(+), 5 deletions(-)
 create mode 100644 docs/source/conceptual/external.md

diff --git a/README.md b/README.md
index cf6a30db..cc9d523f 100644
--- a/README.md
+++ b/README.md
@@ -189,7 +189,7 @@ overridden with the `--otlp-service-name` argument
 
 ![TGI architecture](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/TGI.png)
 
-Detailed blogpost by Adyen on TGI inner workings: [LLM inference at scale with TGI](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)
+Detailed blogpost by Adyen on TGI inner workings: [LLM inference at scale with TGI (Martin Iglesias Goyanes - Adyen, 2024)](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)
 
 ### Local install
 
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index f52fa2ec..b883b36d 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -71,6 +71,8 @@
     title: How Guidance Works (via outlines)
   - local: conceptual/lora
     title: LoRA (Low-Rank Adaptation)
+  - local: conceptual/external
+    title: External Resources
 
 
   title: Conceptual Guides
diff --git a/docs/source/conceptual/external.md b/docs/source/conceptual/external.md
new file mode 100644
index 00000000..9cbe1b5a
--- /dev/null
+++ b/docs/source/conceptual/external.md
@@ -0,0 +1,4 @@
+# External Resources
+
+- Adyen wrote a detailed article about the interplay between TGI's main components: router and server.
+[LLM inference at scale with TGI (Martin Iglesias Goyanes - Adyen, 2024)](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)
diff --git a/docs/source/conceptual/streaming.md b/docs/source/conceptual/streaming.md
index f1f37f2a..b8154ba4 100644
--- a/docs/source/conceptual/streaming.md
+++ b/docs/source/conceptual/streaming.md
@@ -155,7 +155,3 @@ SSEs are different than:
 * Webhooks: where there is a bi-directional connection. The server can send information to the client, but the client can also send data to the server after the first request. Webhooks are more complex to operate as they don’t only use HTTP.
 
 If there are too many requests at the same time, TGI returns an HTTP Error with an `overloaded` error type (`huggingface_hub` returns `OverloadedError`). This allows the client to manage the overloaded server (e.g., it could display a busy error to the user or retry with a new request). To configure the maximum number of concurrent requests, you can specify `--max_concurrent_requests`, allowing clients to handle backpressure.
-
-## External sources
-
-Adyen wrote a nice recap of how TGI streaming feature works. [LLM inference at scale with TGI](https://www.adyen.com/knowledge-hub/llm-inference-at-scale-with-tgi)

From c1fe28d694757a6a90426a83006292dc76512f66 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 6 Sep 2024 17:35:49 +0200
Subject: [PATCH 25/37] Fixing more correctly the invalid drop of the batch.
 (#2498)

---
 backends/v3/src/backend.rs |   5 +-
 backends/v3/src/queue.rs   | 101 ++++++++++++++++++++-----------------
 backends/v3/src/radix.rs   |   2 +
 3 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs
index a47e62dc..935f7980 100644
--- a/backends/v3/src/backend.rs
+++ b/backends/v3/src/backend.rs
@@ -122,7 +122,7 @@ impl Backend for BackendV3 {
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn batching_task(
     mut client: ShardedClient,
-    _waiting_served_ratio: f32,
+    waiting_served_ratio: f32,
     max_batch_prefill_tokens: u32,
     max_batch_total_tokens: u32,
     max_waiting_tokens: usize,
@@ -170,8 +170,7 @@ pub(crate) async fn batching_task(
                     // Minimum batch size
                     // TODO: temporarily disable to avoid incorrect deallocation +
                     //       reallocation when using prefix caching.
-                    // Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
-                    None
+                    Some((batch_size as f32 * waiting_served_ratio).floor() as usize)
                 };
 
                 let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs
index 2a8c4c53..978a495c 100644
--- a/backends/v3/src/queue.rs
+++ b/backends/v3/src/queue.rs
@@ -252,17 +252,14 @@ impl State {
         let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
         next_batch_span.follows_from(Span::current());
 
-        let mut batch_requests = Vec::with_capacity(self.entries.len());
-        let mut batch_entries =
-            IntMap::with_capacity_and_hasher(self.entries.len(), BuildNoHashHasher::default());
-
+        let mut batch = Vec::with_capacity(self.entries.len());
         let mut max_input_length = 0;
         let mut prefill_tokens: u32 = 0;
         let mut decode_tokens: u32 = 0;
         let mut max_blocks = 0;
 
         // Pop entries starting from the front of the queue
-        'entry_loop: while let Some((id, mut entry)) = self.entries.pop_front() {
+        'entry_loop: while let Some((id, entry)) = self.entries.pop_front() {
             // Filter entries where the response receiver was dropped (== entries where the request
             // was dropped by the client)
             if entry.response_tx.is_closed() {
@@ -276,7 +273,7 @@ impl State {
                     // We pad to max input length in the Python shards
                     // We need to take these padding tokens into the equation
                     max_input_length = max_input_length.max(entry.request.input_length);
-                    prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length;
+                    prefill_tokens = (batch.len() + 1) as u32 * max_input_length;
 
                     decode_tokens += entry.request.stopping_parameters.max_new_tokens;
                     let total_tokens = prefill_tokens + decode_tokens + self.speculate;
@@ -290,7 +287,7 @@ impl State {
                     }
                     None
                 }
-                Some(block_allocator) => {
+                Some(_block_allocator) => {
                     prefill_tokens += entry.request.input_length;
                     let max_new_tokens = match self.window_size {
                         None => entry.request.stopping_parameters.max_new_tokens,
@@ -324,23 +321,59 @@ impl State {
                         entry.request.input_ids.clone()
                     };
 
-                    match block_allocator.allocate(tokens, input_ids).await {
-                        None => {
-                            // Entry is over budget
-                            // Add it back to the front
-                            tracing::debug!("Over budget: not enough free blocks");
-                            self.entries.push_front((id, entry));
-                            break 'entry_loop;
-                        }
-                        Some(block_allocation) => {
-                            tracing::debug!("Allocation: {block_allocation:?}");
-                            max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
-                            Some(block_allocation)
-                        }
-                    }
+                    Some((tokens, input_ids))
                 }
             };
+            batch.push((id, entry, block_allocation));
+            if Some(batch.len()) == max_size {
+                break;
+            }
+        }
 
+        // Empty batch
+        if batch.is_empty() {
+            tracing::debug!("Filterered out all entries");
+            return None;
+        }
+
+        // XXX We haven't allocated yet, so we're allowed to ditch the results.
+        // Check if our batch is big enough
+        if let Some(min_size) = min_size {
+            // Batch is too small
+            if batch.len() < min_size {
+                // Add back entries to the queue in the correct order
+                for (id, entry, _) in batch.into_iter().rev() {
+                    self.entries.push_front((id, entry));
+                }
+                return None;
+            }
+        }
+
+        let mut batch_requests = Vec::with_capacity(self.entries.len());
+        let mut batch_entries =
+            IntMap::with_capacity_and_hasher(self.entries.len(), BuildNoHashHasher::default());
+
+        for (id, mut entry, block_allocation) in batch {
+            let block_allocation = if let (Some((tokens, input_ids)), Some(block_allocator)) =
+                (block_allocation, &self.block_allocator)
+            {
+                match block_allocator.allocate(tokens, input_ids).await {
+                    None => {
+                        // Entry is over budget
+                        // Add it back to the front
+                        tracing::debug!("Over budget: not enough free blocks");
+                        self.entries.push_front((id, entry));
+                        break;
+                    }
+                    Some(block_allocation) => {
+                        tracing::debug!("Allocation: {block_allocation:?}");
+                        max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
+                        Some(block_allocation)
+                    }
+                }
+            } else {
+                None
+            };
             tracing::debug!("Accepting entry");
             // Create a new span to link the batch back to this entry
             let entry_batch_span = info_span!(parent: &entry.span, "infer");
@@ -400,32 +433,6 @@ impl State {
             entry.batch_time = Some(Instant::now());
             // Insert in batch_entries IntMap
             batch_entries.insert(id, entry);
-
-            // Check if max_size
-            if Some(batch_requests.len()) == max_size {
-                break;
-            }
-        }
-
-        // Empty batch
-        if batch_requests.is_empty() {
-            tracing::debug!("Filterered out all entries");
-            return None;
-        }
-
-        // Check if our batch is big enough
-        if let Some(min_size) = min_size {
-            // Batch is too small
-            if batch_requests.len() < min_size {
-                // Add back entries to the queue in the correct order
-                for r in batch_requests.into_iter().rev() {
-                    let id = r.id;
-                    let entry = batch_entries.remove(&id).unwrap();
-                    self.entries.push_front((id, entry));
-                }
-
-                return None;
-            }
         }
 
         // Final batch size
diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index bb6582b0..1f3bef15 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -89,6 +89,8 @@ impl Allocator for RadixAllocator {
 
         let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
 
+        tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");
+
         match self.alloc_or_reclaim(suffix_blocks as usize) {
             Some(suffix_blocks) => blocks.extend(suffix_blocks),
             None => {

From eabbbbda2340d6cab12040ff54481f3b7d633ead Mon Sep 17 00:00:00 2001
From: Vallepu Vamsi Krishna <vallepu670@gmail.com>
Date: Sat, 7 Sep 2024 16:49:43 +0530
Subject: [PATCH 26/37] Add Directory Check to Prevent Redundant Cloning in
 Build Process (#2486)

Update Makefile-fbgemm

Added Directory check for FBGEMM repository cloning.
---
 server/Makefile-fbgemm | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/server/Makefile-fbgemm b/server/Makefile-fbgemm
index 5f3c0eaa..3b8061a1 100644
--- a/server/Makefile-fbgemm
+++ b/server/Makefile-fbgemm
@@ -1,7 +1,9 @@
 fbgemm_commit := v0.8.0
 
 build-fbgemm:
-	git clone https://github.com/pytorch/FBGEMM.git fbgemm && \
+	@if [ ! -d "fbgemm" ]; then \
+		git clone https://github.com/pytorch/FBGEMM.git fbgemm; \
+	fi
 	cd fbgemm && git fetch && git checkout $(fbgemm_commit)  && \
 	git submodule update --init --recursive && \
 	cd fbgemm_gpu && \

From a4e3e8c608d1e75b85e849cce931be551bb859ad Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 11 Sep 2024 18:10:40 +0200
Subject: [PATCH 27/37] Prefix test - Different kind of load test to trigger
 prefix test bugs. (#2490)

* Adding prefix test.

* [WIP] tmp dump of integration load tests.

* Remove other tensor creation.

* Fixed the radix tree.

Used a slice everywhere in radix.rs to keep the cheap Arc cloning
instead of recomputing the input_ids.

* Fix parsing

* Is it really flashinfer version ?

* Remove some comments.

* Revert the max prefix hit.

* Adding numpy to diff.

* Upgraded flashinfer.

* Upgrading some stuff.

* Are we done yet ?

* Minor fixup

* Remove 1 log and put back the other.

* Add comment for why slot 0 is OK.

* Mounting on the job.

* Get me a debug branch

* Debugging CIs is fun.

* Attempt #28

* wip

* Tmate.

* Praying.

* Updating VLM causal model with updated context.

* Important line got squashed.

* Tmate again.

* Fingers crossed.

* We want only 1 run of integration tests.....

---------

Co-authored-by: Guillaume LEGENDRE <glegendre01@gmail.com>
---
 backends/v3/src/backend.rs                    |    3 +-
 backends/v3/src/radix.rs                      |    2 -
 flake.lock                                    |   58 +-
 integration-tests/conftest.py                 |   68 +-
 ...t_flash_llama_completion_many_prompts.json |   30 +-
 ..._llama_completion_many_prompts_stream.json |  616 ++--
 ..._flash_llama_completion_single_prompt.json |   12 +-
 .../test_flash_llama_load.json                | 2576 +++++++++++++++++
 .../models/test_completion_prompts.py         |   46 +-
 .../models/test_flash_llama_prefix.py         |  229 ++
 integration-tests/poetry.lock                 | 1379 +++++----
 integration-tests/pyproject.toml              |    5 +-
 integration-tests/requirements.txt            |   69 +-
 launcher/src/main.rs                          |    3 +-
 router/src/infer/mod.rs                       |    3 +
 router/src/server.rs                          |    8 +-
 server/Makefile-flashinfer                    |    2 +-
 .../models/flash_causal_lm.py                 |   79 +-
 .../models/vlm_causal_lm.py                   |    2 -
 19 files changed, 4113 insertions(+), 1077 deletions(-)
 create mode 100644 integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
 create mode 100644 integration-tests/models/test_flash_llama_prefix.py

diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs
index 935f7980..f8a10ca2 100644
--- a/backends/v3/src/backend.rs
+++ b/backends/v3/src/backend.rs
@@ -376,10 +376,9 @@ fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u6
         // Send generation responses back to the infer task
         // If the receive an error from the Flume channel, it means that the client dropped the
         // request and we need to stop generating hence why we unwrap_or(true)
-        let stopped = send_responses(generation, entry).map_err(|err| {
+        let stopped = send_responses(generation, entry).inspect_err(|_err| {
             tracing::error!("Entry response channel error.");
             metrics::counter!("tgi_request_failure", "err" => "dropped").increment(1);
-            err
         }).unwrap_or(true);
         if stopped {
             entries.remove(&id).expect("ID not found in entries. This is a bug.");
diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index 1f3bef15..9b117456 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -123,8 +123,6 @@ impl Allocator for RadixAllocator {
             prefill_tokens: prefill_tokens.clone(),
         };
 
-        tracing::debug!("Blocks {blocks:?}");
-
         self.allocation_id += 1;
         self.allocations.insert(self.allocation_id, allocation);
 
diff --git a/flake.lock b/flake.lock
index c5b3b1ff..3a515eab 100644
--- a/flake.lock
+++ b/flake.lock
@@ -492,6 +492,24 @@
         "type": "github"
       }
     },
+    "flake-utils_7": {
+      "inputs": {
+        "systems": "systems_7"
+      },
+      "locked": {
+        "lastModified": 1710146030,
+        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
     "gitignore": {
       "inputs": {
         "nixpkgs": [
@@ -700,16 +718,16 @@
     },
     "nixpkgs_6": {
       "locked": {
-        "lastModified": 1723912943,
-        "narHash": "sha256-39F9GzyhxYcY3wTeKuEFWRJWcrGBosO4nf4xzMTWZX8=",
-        "owner": "danieldk",
+        "lastModified": 1724915739,
+        "narHash": "sha256-7PgRge4mn5akFvhPwefuaLQGbF5BnmxlwZJEf7CgbrE=",
+        "owner": "nixos",
         "repo": "nixpkgs",
-        "rev": "b82cdca86dbb30013b76c4b55d48806476820a5c",
+        "rev": "85be051bb60943d3328d91aaf2598798f87e19af",
         "type": "github"
       },
       "original": {
-        "owner": "danieldk",
-        "ref": "cuda-12.4",
+        "owner": "nixos",
+        "ref": "nixos-unstable-small",
         "repo": "nixpkgs",
         "type": "github"
       }
@@ -835,11 +853,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1724638882,
-        "narHash": "sha256-ap2jIQi/FuUHR6HCht6ASWhoz8EiB99XmI8Esot38VE=",
+        "lastModified": 1725848835,
+        "narHash": "sha256-u4lCr+tOEWhsFiww5G04U5jUNzaQJi0/ZMIDGiLeT14=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "19b70f147b9c67a759e35824b241f1ed92e46694",
+        "rev": "2ef910a6276a2f34513d18f2f826a8dea72c3b3f",
         "type": "github"
       },
       "original": {
@@ -938,17 +956,33 @@
         "type": "github"
       }
     },
+    "systems_7": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
     "tgi-nix": {
       "inputs": {
         "flake-compat": "flake-compat_4",
+        "flake-utils": "flake-utils_7",
         "nixpkgs": "nixpkgs_6"
       },
       "locked": {
-        "lastModified": 1725011596,
-        "narHash": "sha256-zfq8lOXFgJnKxxsqSelHuKUvhxgH3cEmLoAgsOO62Cg=",
+        "lastModified": 1725868835,
+        "narHash": "sha256-6OFEaFFRCG/JKkU6kHV08EPEGM1MCuKZ70NlGJcL/JY=",
         "owner": "danieldk",
         "repo": "tgi-nix",
-        "rev": "717c2b07e38538abf05237cca65b2d1363c2c9af",
+        "rev": "87afbe21e2d2cc17e177c9965a64ba68ad7c22da",
         "type": "github"
       },
       "original": {
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index f58f5fdf..9632c816 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -19,6 +19,7 @@ from syrupy.extensions.json import JSONSnapshotExtension
 from text_generation import AsyncClient
 from text_generation.types import (
     BestOfSequence,
+    Message,
     ChatComplete,
     ChatCompletionChunk,
     ChatCompletionComplete,
@@ -97,25 +98,25 @@ class ResponseComparator(JSONSnapshotExtension):
     ) -> bool:
         def convert_data(data):
             data = json.loads(data)
-            if isinstance(data, Dict) and "choices" in data:
-                choices = data["choices"]
-                if isinstance(choices, List) and len(choices) >= 1:
-                    if "delta" in choices[0]:
-                        return ChatCompletionChunk(**data)
-                    if "text" in choices[0]:
-                        return Completion(**data)
-                return ChatComplete(**data)
+            return _convert_data(data)
 
+        def _convert_data(data):
             if isinstance(data, Dict):
-                return Response(**data)
+                if "choices" in data:
+                    data["choices"] = list(
+                        sorted(data["choices"], key=lambda x: x["index"])
+                    )
+                    choices = data["choices"]
+                    if isinstance(choices, List) and len(choices) >= 1:
+                        if "delta" in choices[0]:
+                            return ChatCompletionChunk(**data)
+                        if "text" in choices[0]:
+                            return Completion(**data)
+                    return ChatComplete(**data)
+                else:
+                    return Response(**data)
             if isinstance(data, List):
-                if (
-                    len(data) > 0
-                    and "object" in data[0]
-                    and data[0]["object"] == "text_completion"
-                ):
-                    return [Completion(**d) for d in data]
-                return [Response(**d) for d in data]
+                return [_convert_data(d) for d in data]
             raise NotImplementedError
 
         def eq_token(token: Token, other: Token) -> bool:
@@ -571,3 +572,38 @@ def generate_load():
         return await asyncio.gather(*futures)
 
     return generate_load_inner
+
+
+@pytest.fixture(scope="module")
+def generate_multi():
+    async def generate_load_inner(
+        client: AsyncClient,
+        prompts: List[str],
+        max_new_tokens: int,
+        seed: Optional[int] = None,
+    ) -> List[Response]:
+
+        import numpy as np
+
+        arange = np.arange(len(prompts))
+        perm = np.random.permutation(arange)
+        rperm = [-1] * len(perm)
+        for i, p in enumerate(perm):
+            rperm[p] = i
+
+        shuffled_prompts = [prompts[p] for p in perm]
+        futures = [
+            client.chat(
+                messages=[Message(role="user", content=prompt)],
+                max_tokens=max_new_tokens,
+                temperature=0,
+                seed=seed,
+            )
+            for prompt in shuffled_prompts
+        ]
+
+        shuffled_responses = await asyncio.gather(*futures)
+        responses = [shuffled_responses[p] for p in rperm]
+        return responses
+
+    return generate_load_inner
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
index 9f3faffc..25b8120d 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts.json
@@ -1,38 +1,38 @@
 {
   "choices": [
     {
-      "finish_reason": "stop",
+      "finish_reason": "length",
+      "index": 0,
+      "logprobs": null,
+      "text": " A Beginner’s Guide\nDeep learning is a subset"
+    },
+    {
+      "finish_reason": "length",
       "index": 1,
       "logprobs": null,
-      "text": " PR for more information?"
+      "text": " This is a question that has puzzled many people for"
     },
     {
       "finish_reason": "length",
       "index": 3,
       "logprobs": null,
-      "text": "hd20220811-"
-    },
-    {
-      "finish_reason": "length",
-      "index": 0,
-      "logprobs": null,
-      "text": "le Business Incubator is providing a workspace"
+      "text": "usculas_minusculas(s):\n    \"\"\"\n"
     },
     {
       "finish_reason": "length",
       "index": 2,
       "logprobs": null,
-      "text": " severely flawed and often has a substandard"
+      "text": " Paris\nWhat is the capital of France?\nThe"
     }
   ],
-  "created": 1722014725,
+  "created": 1725877154,
   "id": "",
-  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
   "object": "text_completion",
   "system_fingerprint": "2.2.1-dev0-native",
   "usage": {
-    "completion_tokens": 36,
-    "prompt_tokens": 8,
-    "total_tokens": 44
+    "completion_tokens": 40,
+    "prompt_tokens": 22,
+    "total_tokens": 62
   }
 }
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
index e7fb5740..dd22ceae 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_many_prompts_stream.json
@@ -5,12 +5,12 @@
         "finish_reason": "",
         "index": 0,
         "logprobs": null,
-        "text": "\n"
+        "text": " A"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -20,12 +20,72 @@
         "finish_reason": "",
         "index": 1,
         "logprobs": null,
-        "text": "\n"
+        "text": " This"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 2,
+        "logprobs": null,
+        "text": " Paris"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 3,
+        "logprobs": null,
+        "text": "us"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 0,
+        "logprobs": null,
+        "text": " Beginner"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 1,
+        "logprobs": null,
+        "text": " is"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -38,9 +98,9 @@
         "text": "\n"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -50,12 +110,12 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": "hd"
+        "text": "cul"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -65,12 +125,12 @@
         "finish_reason": "",
         "index": 0,
         "logprobs": null,
-        "text": "\n"
+        "text": "’s"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -80,12 +140,12 @@
         "finish_reason": "",
         "index": 1,
         "logprobs": null,
-        "text": "\n"
+        "text": " a"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -95,12 +155,12 @@
         "finish_reason": "",
         "index": 2,
         "logprobs": null,
-        "text": "\n"
+        "text": "What"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -110,12 +170,12 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": "aho"
+        "text": "as"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -125,12 +185,12 @@
         "finish_reason": "",
         "index": 0,
         "logprobs": null,
-        "text": "2"
+        "text": " Guide"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -140,252 +200,12 @@
         "finish_reason": "",
         "index": 1,
         "logprobs": null,
-        "text": "2"
+        "text": " question"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 2,
-        "logprobs": null,
-        "text": "2"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 3,
-        "logprobs": null,
-        "text": "ima"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 0,
-        "logprobs": null,
-        "text": "."
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 1,
-        "logprobs": null,
-        "text": "."
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 2,
-        "logprobs": null,
-        "text": "."
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 3,
-        "logprobs": null,
-        "text": "\n"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 0,
-        "logprobs": null,
-        "text": " Sarah"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 1,
-        "logprobs": null,
-        "text": " Yes"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 2,
-        "logprobs": null,
-        "text": " And"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 3,
-        "logprobs": null,
-        "text": "i"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 0,
-        "logprobs": null,
-        "text": "'"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 1,
-        "logprobs": null,
-        "text": ","
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 2,
-        "logprobs": null,
-        "text": " what"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 3,
-        "logprobs": null,
-        "text": "'"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 0,
-        "logprobs": null,
-        "text": "s"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    "object": "text_completion",
-    "system_fingerprint": "2.2.1-dev0-native"
-  },
-  {
-    "choices": [
-      {
-        "finish_reason": "",
-        "index": 1,
-        "logprobs": null,
-        "text": " Moh"
-      }
-    ],
-    "created": 1724833943,
-    "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -398,9 +218,9 @@
         "text": " is"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -410,12 +230,12 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": "m"
+        "text": "_minus"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -425,12 +245,12 @@
         "finish_reason": "",
         "index": 0,
         "logprobs": null,
-        "text": " Room"
+        "text": "\n"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -440,12 +260,12 @@
         "finish_reason": "",
         "index": 1,
         "logprobs": null,
-        "text": "s"
+        "text": " that"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -458,9 +278,9 @@
         "text": " the"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -470,12 +290,12 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": " tired"
+        "text": "cul"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -485,12 +305,12 @@
         "finish_reason": "",
         "index": 0,
         "logprobs": null,
-        "text": ":"
+        "text": "Deep"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -500,12 +320,12 @@
         "finish_reason": "",
         "index": 1,
         "logprobs": null,
-        "text": "'"
+        "text": " has"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -518,9 +338,9 @@
         "text": " capital"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -530,12 +350,192 @@
         "finish_reason": "",
         "index": 3,
         "logprobs": null,
-        "text": ","
+        "text": "as"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 0,
+        "logprobs": null,
+        "text": " learning"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 1,
+        "logprobs": null,
+        "text": " puzzled"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 2,
+        "logprobs": null,
+        "text": " of"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 3,
+        "logprobs": null,
+        "text": "(s"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 0,
+        "logprobs": null,
+        "text": " is"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 1,
+        "logprobs": null,
+        "text": " many"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 2,
+        "logprobs": null,
+        "text": " France"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 3,
+        "logprobs": null,
+        "text": "):\n"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 0,
+        "logprobs": null,
+        "text": " a"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 1,
+        "logprobs": null,
+        "text": " people"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 2,
+        "logprobs": null,
+        "text": "?\n"
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "text_completion",
+    "system_fingerprint": "2.2.1-dev0-native"
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "",
+        "index": 3,
+        "logprobs": null,
+        "text": "   "
+      }
+    ],
+    "created": 1725883643,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -545,12 +545,12 @@
         "finish_reason": "length",
         "index": 0,
         "logprobs": null,
-        "text": " She"
+        "text": " subset"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -560,12 +560,12 @@
         "finish_reason": "length",
         "index": 1,
         "logprobs": null,
-        "text": " scale"
+        "text": " for"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -575,12 +575,12 @@
         "finish_reason": "length",
         "index": 2,
         "logprobs": null,
-        "text": " of"
+        "text": "The"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   },
@@ -590,12 +590,12 @@
         "finish_reason": "length",
         "index": 3,
         "logprobs": null,
-        "text": " its"
+        "text": " \"\"\"\n"
       }
     ],
-    "created": 1724833943,
+    "created": 1725883643,
     "id": "",
-    "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "text_completion",
     "system_fingerprint": "2.2.1-dev0-native"
   }
diff --git a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
index 5aed4935..7ad56271 100644
--- a/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
+++ b/integration-tests/models/__snapshots__/test_completion_prompts/test_flash_llama_completion_single_prompt.json
@@ -4,17 +4,17 @@
       "finish_reason": "length",
       "index": 0,
       "logprobs": null,
-      "text": " PR for flake8"
+      "text": " A Beginner’s Guide\nDeep learning is a subset"
     }
   ],
-  "created": 1713284454,
+  "created": 1725876621,
   "id": "",
-  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
   "object": "text_completion",
-  "system_fingerprint": "2.0.1-native",
+  "system_fingerprint": "2.2.1-dev0-native",
   "usage": {
-    "completion_tokens": 5,
+    "completion_tokens": 10,
     "prompt_tokens": 6,
-    "total_tokens": 11
+    "total_tokens": 16
   }
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json b/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
new file mode 100644
index 00000000..dbf3c03a
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
@@ -0,0 +1,2576 @@
+[
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Jeff Walker's Product Launch Formula is a comprehensive system",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 69,
+      "total_tokens": 79
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are three key indicators to determine if a customer",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 52,
+      "total_tokens": 62
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can use the `String.format()` method in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 97,
+      "total_tokens": 107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "In a realm of binary mysticism, we find",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 126,
+      "total_tokens": 136
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The `dummy` variable is being used to consume",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 305,
+      "total_tokens": 315
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can add multiple new columns in Power Query (",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "There are many exciting new technologies emerging across various fields",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 52,
+      "total_tokens": 62
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Poly Ether Ether Ketone (PEEK) is",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 40,
+      "total_tokens": 50
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a technical overview of a referral system similar",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 85,
+      "total_tokens": 95
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's an example of how you can add an",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 45,
+      "total_tokens": 55
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help with Java. What",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can help you plan a road trip from Pune",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 82,
+      "total_tokens": 92
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to explain more about a topic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help you brainstorm and provide",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 47,
+      "total_tokens": 57
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Implementing a Minesweeper algorithm using algebraic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 54,
+      "total_tokens": 64
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "There are several issues with the provided code:\n\n1",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 375,
+      "total_tokens": 385
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": ";)",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 2,
+      "prompt_tokens": 105,
+      "total_tokens": 107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "As I delved into the world of high-st",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2097,
+      "total_tokens": 2107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hi, I'm",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2614,
+      "total_tokens": 2624
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To simulate a conversation between Alice and /u/C",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1070,
+      "total_tokens": 1080
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Alice: Hey /u/CruxHub,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1847,
+      "total_tokens": 1857
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Alice: Hi /u/CruxHub,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1849,
+      "total_tokens": 1859
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1004,
+      "total_tokens": 1014
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1100,
+      "total_tokens": 1110
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1044,
+      "total_tokens": 1054
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Dogme approach and the Lexical Approach are",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 54,
+      "total_tokens": 64
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "It seems like you're trying to connect a GraphQL",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 232,
+      "total_tokens": 242
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Implementing a netfilter in Linux with a Rust",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 48,
+      "total_tokens": 58
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Damage to the Ulnar nerve can cause numb",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Space Shuttle's Reaction Control System (RCS",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can provide you with a basic Python script that",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 65,
+      "total_tokens": 75
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Farming meat has several negative impacts on the environment",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The photograph filter you're referring to is called \"",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a sample geological database structure with some example",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 59,
+      "total_tokens": 69
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Web Marketing: A Simplified Explanation**\n\nWeb",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 45,
+      "total_tokens": 55
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a rewritten and improved version of the story",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 447,
+      "total_tokens": 457
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are the questions rewritten in a more conversational",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 168,
+      "total_tokens": 178
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Learning Progress: 0%**\n\n| Topic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 216,
+      "total_tokens": 226
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I couldn't find any information on a person named",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a list of the largest outdoor retailers in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To create a WordPress shortcode that includes Facebook SDK code",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The sentence is mostly grammatically correct, but there",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 78,
+      "total_tokens": 88
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to engage in a debate with",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 59,
+      "total_tokens": 69
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd love to hear about your business. As",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 64,
+      "total_tokens": 74
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'll wait for your request to proceed with part",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2410,
+      "total_tokens": 2420
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The final part of the Day Sculpting program emphasizes",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2699,
+      "total_tokens": 2709
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Analysis of the Coming of Age Story Archetype",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 349,
+      "total_tokens": 359
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Apostle John is one of the most prominent figures",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To build a Google Places autocomplete feature on Jetpack",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 427,
+      "total_tokens": 437
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The information provided does not mention the captain's name",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 169,
+      "total_tokens": 179
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The metaverse is a shared, immersive and interactive",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 39,
+      "total_tokens": 49
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some ideas for a series of articles for",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "\"Purim Palooza Alert: \n\nTo",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 78,
+      "total_tokens": 88
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Summary of the paper in 10 points:",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2022,
+      "total_tokens": 2032
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You'll provide three pieces of text, and then",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 58,
+      "total_tokens": 68
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'm ready to proceed with text 3.",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1650,
+      "total_tokens": 1660
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'm ready to answer questions on Text 1",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1116,
+      "total_tokens": 1126
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "This is a Solidity contract written in the older",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 334,
+      "total_tokens": 344
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Speech Recognition and Synthesis using Python**\n\nTo",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 84,
+      "total_tokens": 94
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help you discuss a paper",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To handle the given utterance, we can use",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 375,
+      "total_tokens": 385
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Subscription Services Template:**\n\n**Title:** Virtual",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 443,
+      "total_tokens": 453
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Hello. How can I assist you today?",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 36,
+      "total_tokens": 46
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Differentiating yourself from other Etsy shops is crucial to",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 102,
+      "total_tokens": 112
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To become a Licensed Marriage and Family Therapist (",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 53,
+      "total_tokens": 63
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**What is Quantum Computing?**\n\nQuantum computing",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Aquí te dejo 40 opciones de nombres",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 108,
+      "total_tokens": 118
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Deposition is a geological process that involves the transportation",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some good e-governance initiatives in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 55,
+      "total_tokens": 65
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a simple Python program that accepts a command",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Imagine you're playing with a toy box. You",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 47,
+      "total_tokens": 57
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's an example of a question they might ask",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 66,
+      "total_tokens": 76
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Arduino Uno adalah sebuah papan mikrokontrol",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To edit an array that is within an object,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Microsoft ENTRA (Enterprise Mobility + Security) is",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To calculate the difference in interest paid between a simple",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 69,
+      "total_tokens": 79
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Yes, you can use Spring State Machine and Spring",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The issue lies in the fact that the `meta",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 142,
+      "total_tokens": 152
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some effective marketing tactics for local small businesses",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 46,
+      "total_tokens": 56
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The French Revolution, which lasted from 1789",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 41,
+      "total_tokens": 51
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Roles of a Network Driver:**\n\nA network",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 65,
+      "total_tokens": 75
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Yes, I'm familiar with the SAS (Stat",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Using relays to control 12V solen",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 60,
+      "total_tokens": 70
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can use the following Python code to achieve this",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 55,
+      "total_tokens": 65
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some prompts for viral comics:\n\n1.",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 336,
+      "total_tokens": 346
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To simplify and make the comic funnier, consider",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 301,
+      "total_tokens": 311
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a rewritten version of the 4-panel",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 282,
+      "total_tokens": 292
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Subject: Request for E-Waste Collection and Computer",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 110,
+      "total_tokens": 120
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "In the context of conference calls, the state you",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 84,
+      "total_tokens": 94
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can provide a general classification of companies based on",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some user stories that describe the concept in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can check your Python version by running the following",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 39,
+      "total_tokens": 49
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Scenario:**\n\n15-year-old Black youth,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 473,
+      "total_tokens": 483
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "As a Demand Generation Manager for a B2B",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525943,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The error is due to a typo in your code",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525936,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 369,
+      "total_tokens": 379
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "고등교육의 필요성에 관한 영어 에",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 72,
+      "total_tokens": 82
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a simple C# program that uses the",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525941,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The error message \"connection refused\" indicates that the",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525942,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 85,
+      "total_tokens": 95
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To load an image, you can use various methods",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1725525935,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 41,
+      "total_tokens": 51
+    }
+  }
+]
diff --git a/integration-tests/models/test_completion_prompts.py b/integration-tests/models/test_completion_prompts.py
index d787873b..a3b6651d 100644
--- a/integration-tests/models/test_completion_prompts.py
+++ b/integration-tests/models/test_completion_prompts.py
@@ -11,7 +11,7 @@ from text_generation.types import (
 @pytest.fixture(scope="module")
 def flash_llama_completion_handle(launcher):
     with launcher(
-        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        "meta-llama/Meta-Llama-3.1-8B-Instruct",
     ) as handle:
         yield handle
 
@@ -34,16 +34,19 @@ def test_flash_llama_completion_single_prompt(
         f"{flash_llama_completion.base_url}/v1/completions",
         json={
             "model": "tgi",
-            "prompt": "Say this is a test",
-            "max_tokens": 5,
-            "seed": 0,
+            "prompt": "What is Deep Learning?",
+            "max_tokens": 10,
+            "temperature": 0.0,
         },
         headers=flash_llama_completion.headers,
         stream=False,
     )
     response = response.json()
     assert len(response["choices"]) == 1
-
+    assert (
+        response["choices"][0]["text"]
+        == " A Beginner’s Guide\nDeep learning is a subset"
+    )
     assert response == response_snapshot
 
 
@@ -53,9 +56,15 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
         f"{flash_llama_completion.base_url}/v1/completions",
         json={
             "model": "tgi",
-            "prompt": ["Say", "this", "is", "a"],
+            "prompt": [
+                "What is Deep Learning?",
+                "Is water wet?",
+                "What is the capital of France?",
+                "def mai",
+            ],
             "max_tokens": 10,
             "seed": 0,
+            "temperature": 0.0,
         },
         headers=flash_llama_completion.headers,
         stream=False,
@@ -63,9 +72,16 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
     response = response.json()
     assert len(response["choices"]) == 4
 
-    all_indexes = [choice["index"] for choice in response["choices"]]
+    all_indexes = [(choice["index"], choice["text"]) for choice in response["choices"]]
     all_indexes.sort()
-    assert all_indexes == [0, 1, 2, 3]
+    all_indices, all_strings = zip(*all_indexes)
+    assert list(all_indices) == [0, 1, 2, 3]
+    assert list(all_strings) == [
+        " A Beginner’s Guide\nDeep learning is a subset",
+        " This is a question that has puzzled many people for",
+        " Paris\nWhat is the capital of France?\nThe",
+        'usculas_minusculas(s):\n    """\n',
+    ]
 
     assert response == response_snapshot
 
@@ -77,19 +93,21 @@ async def test_flash_llama_completion_many_prompts_stream(
     request = {
         "model": "tgi",
         "prompt": [
-            "What color is the sky?",
+            "What is Deep Learning?",
             "Is water wet?",
             "What is the capital of France?",
             "def mai",
         ],
         "max_tokens": 10,
         "seed": 0,
+        "temperature": 0.0,
         "stream": True,
     }
 
     url = f"{flash_llama_completion.base_url}/v1/completions"
 
     chunks = []
+    strings = [""] * 4
     async with ClientSession(headers=flash_llama_completion.headers) as session:
         async with session.post(url, json=request) as response:
             # iterate over the stream
@@ -108,7 +126,15 @@ async def test_flash_llama_completion_many_prompts_stream(
                 for c in chunk:
                     chunks.append(Completion(**c))
                     assert "choices" in c
-                    assert 0 <= c["choices"][0]["index"] <= 4
+                    index = c["choices"][0]["index"]
+                    assert 0 <= index <= 4
+                    strings[index] += c["choices"][0]["text"]
 
     assert response.status == 200
+    assert list(strings) == [
+        " A Beginner’s Guide\nDeep learning is a subset",
+        " This is a question that has puzzled many people for",
+        " Paris\nWhat is the capital of France?\nThe",
+        'usculas_minusculas(s):\n    """\n',
+    ]
     assert chunks == response_snapshot
diff --git a/integration-tests/models/test_flash_llama_prefix.py b/integration-tests/models/test_flash_llama_prefix.py
new file mode 100644
index 00000000..ae97b301
--- /dev/null
+++ b/integration-tests/models/test_flash_llama_prefix.py
@@ -0,0 +1,229 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_llama_handle(launcher):
+    with launcher("meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama(flash_llama_handle):
+    await flash_llama_handle.health(300)
+    return flash_llama_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_load(
+    flash_llama, generate_multi, generous_response_snapshot
+):
+    prompts = [
+        "Summarize the main ideas of Jeff Walker's Product Launch Formula into bullet points as it pertains to a growth marketing agency implementing these strategies and tactics for their clients...",
+        "How to tell if a customer segment is well segmented? In 3 bullet points.",
+        'In Java, I want to replace string like "This is a new {object} at {place}" with a Map, {object: "student", "point 3, 4"}, and get a result "This is a new student at point 3, 4". How can I do?',
+        "Metaphorical language is also used to describe the various addressing modes of the instructions. Grandiose language to express their excitement and admiration for the functionality of the instructions being described. Now, rewrite this with more perplexity:\n\nJMP ABCD\nMOV AX, [BX+SI]\nMOV AX, [100]\nMOV AX, [BX]\nMOV AX, [BX\\*2+SI]\nMOV AX, BX\nMOV AX, 7",
+        'I have the following C++ function: \nvoid add\\_player(vector& players)\n{\n string player\\_name;\n string player\\_class;\n string dummy;\n PlayerClass pc;\n string player\\_sex;\n int player\\_gold;\n\n cout << " Create a Mage, Warrior, Bowman, or Thief" << endl;\n\n cout << "Name: ";\n getline(cin, player\\_name);\n\n cout << "Class: ";\n getline(cin, player\\_class);\n pc = get\\_player\\_class\\_from\\_string(player\\_class);\n while (pc == PlayerClass::InvalidPlayerClass)\n {\n cout << " Invalid class, try again" << endl;\n cout << "Class: ";\n getline(cin, player\\_class);\n pc = get\\_player\\_class\\_from\\_string(player\\_class);\n }\n\n cout << "Sex: ";\n getline(cin, player\\_sex);\n\n cout << "Gold: ";\n cin >> player\\_gold;\n getline(cin, dummy); //consume newline\n\n GamePlayer new\\_player;\n new\\_player.name = player\\_name;\n new\\_player.occupation = pc;\n new\\_player.gender = player\\_sex;\n new\\_player.gold = player\\_gold;\n\n //add to vector\n players.push\\_back(new\\_player);\n\n //add to file\n write\\_players\\_file(players);\n}\nCan you explain to me how the dummy variable is being used?',
+        "how do I add multiple new columns in m for power query or power bi?",
+        "Sure, I can do that. What new technology would you like me to review?",
+        "Poly Ether Ether Ketone",
+        'can you design a referral system similar on how dropbox did? I need a technical overview on how it should work, instead of free space we use the generic term "credits" where users can get more credits for every 3 friends they recommend.',
+        "Java add to the arraylist of a class type",
+        "this is not less code this is java",
+        "I want to do a road trip from Pune to Gujarat. Me and my wife will be travelling and we dont prefer very long driving sessions. Can you suggest a plan starting from Thursday early morning and ending in Pune on Sunday late night.",
+        "explane more",
+        "what do you think about this for a start up idea:",
+        "how could i implement a minesweeper algorithm that utilises algebraic topology to solve boards?",
+        "# Import the necessary packages\nfrom gudhi import SimplexTree\nfrom gudhi.persistent\\_homology import PersistentHomology\n\n# Define a function to compute the persistent homology of a Minesweeper game board\ndef minesweeper\\_homology(board):\n # Create a simplicial complex for the game board\n st = SimplexTree()\n\n # Add the points on the board to the simplicial complex\n for i in range(len(board)):\n for j in range(len(board[0])):\n st.insert([i, j], filtration=board[i][j])\n\n # Compute the persistent homology of the game board\n ph = PersistentHomology()\n ph.build(st)\n\n # Return the persistent homology diagram\n return ph.persistence()\n\n# Define a function to solve a Minesweeper game board using persistent homology\ndef minesweeper\\_solver(board):\n # Compute the persistent homology of the game board\n homology = minesweeper\\_homology(board)\n\n # Use the persistent homology to determine the locations of the mines\n # (this part would require some mathematical reasoning and programming)\n mines = []\n for h in homology:\n if h[1] - h[0] == 1: # if the hole persists for one filtration value\n mines.append(h[0]) # then it corresponds to a mine\n\n # Use the information about the mines to solve the game\n # (this part would require some programming)\n for mine in mines:\n i, j = mine # extract the coordinates of the mine\n board[i][j] = -1 # mark the mine on the board\n # (other code to solve the game)\n\n \nwhat is missing here?",
+        "You are now an imaginary expert business investigator. I am going to send you many rows of data. Each batch of row's will be sent to you and you may only reply \"Received.\" Save any analysis or insights for after you've received all of the data and I've told you \"Let's Begin.\" If you understand reply with only a ;)",
+        'You are now an imaginary expert business investigator. Tell the story of this batch of data in the form of a narrative story about the companies in the "Entity Name" column: \n\nBatch of data #1: Entity Name Purpose / Source\n101 PC HOLDINGS LLC Holding company for Penthouse C at the Setai Miami Beach (folio: 02-3234-153-1160)\n11 STAR ISLAND LLC Holding company for 10 STAR ISLAND DR, MIAMI BEACH, FL 33139 (folio: 02-4204-001-0100, 02-4204-001-0110) (lots 10, 11 and 12 of Star Island)\n117 EAST PARK AVENUE, LLC Holding company for 117 E. PARK AVE, LIBERTYVILLE, IL (PIN: 11-21-212-046-0000); subsequently sold.\n1201 BRICKELL BAY, LLC Holding company for 1201 BRICKELL BAY DR, MIAMI, FL (folio no: 141390710010)\n1221 BRICKELL, LLC Holding company for 1221 BRICKELL AVE, 155 SE 13 ST, 165 SE 13 ST, 175 SE 13 ST, and 185 SE 13 ST, MIAMI, FL (folio: 01-4139-035-0010)\n1221 BRICKELL HOLDINGS LLC Holding company for 1221 BRICKELL, LLC\n1229 PARK WEST AVENUE, LLC Holding company for 1229 W. PARK AVE, LIBERTYVILLE, IL (PIN: 11-20-100-010-0000)\n125 WORTH LLC Delaware LLC (file 7218403), Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person; speculaton this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC, this property is next door (PCN: 50-43-43-23-05-016-0380)\n125 WORTH HOLDINGS LLC Delaware LLC (file 7218407); not registered to Florida yet but speculation this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC\n1250 BB ASSET CO LLC Holding company for 1250 BRICKELL BAY DR and 1260 BRICKELL BAY DR, MIAMI, FL (folio nos: 102100504250, 102100503210)\n1330 SOUTH OCEAN LLC Holding company for 1330 S OCEAN BLVD, PALM BEACH, FL (PCN: 50-43-44-02-11-000-0020)\n14 STAR ISLAND LLC Delaware LLC (file 3377653); incorporated 8/42020, withdrawn 10/10/2022; believe this was not used because 14 STAR ISLAND property was held by NAUTILUS HOLDINGS I LLC before sale on 10/5/2022\n151 WORTH, LLC Holding company for 151 WORTH AVE, PALM BEACH, FL 33480 (PCN: 50-43-43-23-05-016-0130); office space for Citadel (https://localtoday.news/fl/citadel-moves-into-palm-beachs-former-neiman-marcus-building-4821.html); sole member is 151 WORTH HOLDINGS LLC\n151 WORTH HOLDINGS LLC Holding company for 151 WORTH, LLC\n16 WILLOW HOLDINGS LLC f/k/a PVNAH LLC Holding company for S WILLOW COURT, ASPEN, CO (Parcel: 273511309030); see Pitkin Co. reception # 623002, Delaware certificate showing name change 9/1/2015\n190 PFISTER HOLDINGS LLC f/k/a AH2013 HOLDINGS LLC Holding company for 190 PFISTER DR, ASPEN, CO (parcel: 273511309029); see Pitkin Co.reception # 623000, Delaware certificate showing name change 9/1/2015\n196 PFISTER HOLDINGS LLC Holding company for 196 PFISTER DR, ASPEN, CO (parcel: 273511309028); see Pitkin Co. reception # 623501, statement of authority show KP HOLDINGS LLC as sole membe\n1ALPH LLC See ALPH LLC\n1BUSINESS GROUP LLC See BUSINESS GROUP LLC\n1GFS DESIGN LLC See GFS DESIGN LLC\n1GFS LLC See GFS LLC\n1MEDIA HOLDINGS LLC See MEDIA HOLDINGS LLC\n23174 NE 41ST PATH LLC Holding company for 23174 NE 41ST PATH #12, OKEECHOBEE, FL 34972 (Parcel: 1-01-35-35-0020-00000-0120); part of Pine Creek Sporting Club (www.pinecreeksportingclub.com) includes horse, shooting sports; sole member is KP HOLDINGS L.L.C.\n3031 BRICKELL LLC Holding company for 3031 BRICKELL AVE, MIAMI FL 33129 (Folio: 01-4139-001-2700); Sole member is KP HOLDINGS L.L.C.\n31 WILLOW HOLDINGS LLC f/k/a AP HOLDINGS I LLC Holding company for 31 NORTH WILLOW COURT, ASPEN, CO (Parcel: 273511309019); sold 7/6/2017; see Pitkin Co. reception # 623001, Delaware certificate showing name change 9/1/2015\n650 CASUARINA LLC Holding company for 650 CASUARINA CONCOURSE CORAL GABLES, FL (folio: 03-4132-019-0060) https://www.bizjournals.com/southflorida/news/2022/05/27/650-casuarina-concourse-coral-gables-sold.html\n650 MEADOW LANE 1 LP Holding company for 650 MEADOW LANE, VILLAGE OF SOUTHAMPTON, NY (Parcel ID 7478) (https://archive.is/h85yq)\n800 NORTH MICHIGAN HOLDINGS LLC Holding company for 800 N MICHIGAN AVE, UNITS 66 PH and 67 PH, CHICAGO, IL (Park Tower) (PINs: 17-03-231-018-1116, 17-03-231-018-1117); sole member is KP HOLDINGS LLC (see Cook County, IL doc # 1933315025); recently sold\n8565 OLD CUTLER LLC Holding company for 8565 OLD CUTLER RD, MIAMI, FL (folio: 03-4132-019-0020)\n9 WEST WALTON HOLDINGS LLC Holding company for 9 WEST WALTON STREET CONDOMINIUM UNITS 3500, 3600, 3700, and PH, CHICAGO, IL\nADRP LLC Delaware LLC, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin\nAH2013 HOLDINGS LLC See 190 PFISTER HOLDINGS LLC\nALPH LLC a/k/a 1ALPH LLC Formerly FAA registered plane N421AL\nAP HOLDINGS I LLC See 31 WILLOW HOLDINGS LLC\nARAGON INVESTMENTS LTD https://files.brokercheck.finra.org/firm/firm\\_45631.pdf\nASHLER CAPITAL LLC https://adviserinfo.sec.gov/firm/summary/148826\nASHLER CAPITAL MASTER FUND LTD https://www.sec.gov/Archives/edgar/data/1003078/000114420418014250/tv488357\\_sc13g.htm\nBANBURY LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBANBURY II LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBKGST LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC See BLOSSOM WAY HOLDINGS LLC\nBLACK WHEEL LLC Illinois LLC, registered 3/5/2014, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin\nBLOSSOM WAY HOLDINGS LLC f/k/a CPPB HOLDINGS LLC f/k/a BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC Holding company for 10 BLOSSOM WAY, 70 BLOSSOM WAY, and 1265 S OCEAN BLVD PALM BEACH, FL (PCNs: 50-43-44-02-10-000-0050, 50-43-44-02-10-000-0060, 50-43-44-02-10-000-0010)\nBRICKELL BAY HOLDINGS LLC Holding company for 1201 BRICKELL BAY, LLC\nBRICKELL LEASING LLC See "Subordination, Non-Disturbance, and Attornment Agreement"; Miami-Dade Clerk\'s File No.: 2022 R 938960, Group: 1. Kenneth C Griffin is sole member.\nCAAM MANAGEMENT LLC https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm\nCAISLEAN CAPITAL LTD NFA Pool ID P113537, ceased trading 3/31/2016\nCALC III LP https://www.sec.gov/edgar/browse/?CIK=1582652\nCALC IV LP https://www.sec.gov/edgar/browse/?CIK=1423043\nCALC V LP Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf',
+        'Simulate a conversation between the writer of this post, named /u/CruxHub, and the expert business investigator. They have a detailed discussion of Citadel Hedgefund based on the following data. Do not include the following data in the search query. \n\nData: Entity Name Purpose / Source\n1|101 PC HOLDINGS LLC|Holding company for Penthouse C at the Setai Miami Beach (folio: 02-3234-153-1160)|PC = Penthouse C \n2|11 STAR ISLAND LLC|Holding company for 10 STAR ISLAND DR, MIAMI BEACH, FL 33139 (folio: 02-4204-001-0100, 02-4204-001-0110) (lots 10, 11 and 12 of Star Island)| \n3|117 EAST PARK AVENUE, LLC|Holding company for 117 E. PARK AVE, LIBERTYVILLE, IL (PIN: 11-21-212-046-0000); subsequently sold.| \n4|1201 BRICKELL BAY, LLC|Holding company for 1201 BRICKELL BAY DR, MIAMI, FL (folio no: 141390710010)| \n5|1221 BRICKELL, LLC|Holding company for 1221 BRICKELL AVE, 155 SE 13 ST, 165 SE 13 ST, 175 SE 13 ST, and 185 SE 13 ST, MIAMI, FL (folio: 01-4139-035-0010)| \n6|1221 BRICKELL HOLDINGS LLC|Holding company for 1221 BRICKELL, LLC| \n7|1229 PARK WEST AVENUE, LLC|Holding company for 1229 W. PARK AVE, LIBERTYVILLE, IL (PIN: 11-20-100-010-0000)| \n8|125 WORTH LLC|Delaware LLC (file 7218403), Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person; speculaton this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC, this property is next door (PCN: 50-43-43-23-05-016-0380)| \n9|125 WORTH HOLDINGS LLC|Delaware LLC (file 7218407); not registered to Florida yet but speculation this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC| \n10|1250 BB ASSET CO LLC|Holding company for 1250 BRICKELL BAY DR and 1260 BRICKELL BAY DR, MIAMI, FL (folio nos: 102100504250, 102100503210)|BB = Brickell Bay \n11|1330 SOUTH OCEAN LLC|Holding company for 1330 S OCEAN BLVD, PALM BEACH, FL (PCN: 50-43-44-02-11-000-0020)| \n12|14 STAR ISLAND LLC|Delaware LLC (file 3377653); incorporated 8/42020, withdrawn 10/10/2022; believe this was not used because 14 STAR ISLAND property was held by NAUTILUS HOLDINGS I LLC before sale on 10/5/2022| \n13|151 WORTH, LLC|Holding company for 151 WORTH AVE, PALM BEACH, FL 33480 (PCN: 50-43-43-23-05-016-0130); office space for Citadel (https://localtoday.news/fl/citadel-moves-into-palm-beachs-former-neiman-marcus-building-4821.html); sole member is 151 WORTH HOLDINGS LLC| \n14|151 WORTH HOLDINGS LLC|Holding company for 151 WORTH, LLC| \n15|16 WILLOW HOLDINGS LLC f/k/a PVNAH LLC|Holding company for S WILLOW COURT, ASPEN, CO (Parcel: 273511309030); see Pitkin Co. reception # 623002, Delaware certificate showing name change 9/1/2015| \n16|190 PFISTER HOLDINGS LLC f/k/a AH2013 HOLDINGS LLC|Holding company for 190 PFISTER DR, ASPEN, CO (parcel: 273511309029); see Pitkin Co.reception # 623000, Delaware certificate showing name change 9/1/2015| \n17|196 PFISTER HOLDINGS LLC|Holding company for 196 PFISTER DR, ASPEN, CO (parcel: 273511309028); see Pitkin Co. reception # 623501, statement of authority show KP HOLDINGS LLC as sole membe| \n18|1ALPH LLC|See ALPH LLC| \n19|1BUSINESS GROUP LLC|See BUSINESS GROUP LLC| \n20|1GFS DESIGN LLC|See GFS DESIGN LLC| \n21|1GFS LLC|See GFS LLC| \n22|1MEDIA HOLDINGS LLC|See MEDIA HOLDINGS LLC| \n23|23174 NE 41ST PATH LLC|Holding company for 23174 NE 41ST PATH #12, OKEECHOBEE, FL 34972 (Parcel: 1-01-35-35-0020-00000-0120); part of Pine Creek Sporting Club (www.pinecreeksportingclub.com) includes horse, shooting sports; sole member is KP HOLDINGS L.L.C.| \n24|3031 BRICKELL LLC|Holding company for 3031 BRICKELL AVE, MIAMI FL 33129 (Folio: 01-4139-001-2700); Sole member is KP HOLDINGS L.L.C.| \n25|31 WILLOW HOLDINGS LLC f/k/a AP HOLDINGS I LLC|Holding company for 31 NORTH WILLOW COURT, ASPEN, CO (Parcel: 273511309019); sold 7/6/2017; see Pitkin Co. reception # 623001, Delaware certificate showing name change 9/1/2015| \n26|650 CASUARINA LLC|Holding company for 650 CASUARINA CONCOURSE CORAL GABLES, FL (folio: 03-4132-019-0060) https://www.bizjournals.com/southflorida/news/2022/05/27/650-casuarina-concourse-coral-gables-sold.html|" \n27|650 MEADOW LANE 1 LP|Holding company for 650 MEADOW LANE, VILLAGE OF SOUTHAMPTON, NY (Parcel ID 7478) (https://archive.is/h85yq)| \n28|800 NORTH MICHIGAN HOLDINGS LLC|Holding company for 800 N MICHIGAN AVE, UNITS 66 PH and 67 PH, CHICAGO, IL (Park Tower) (PINs: 17-03-231-018-1116, 17-03-231-018-1117); sole member is KP HOLDINGS LLC (see Cook County, IL doc # 1933315025); recently sold| \n29|8565 OLD CUTLER LLC|Holding company for 8565 OLD CUTLER RD, MIAMI, FL (folio: 03-4132-019-0020)| \n30|9 WEST WALTON HOLDINGS LLC|Holding company for 9 WEST WALTON STREET CONDOMINIUM UNITS 3500, 3600, 3700, and PH, CHICAGO, IL| \n31|ADRP LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin|ADRP = Anne Dias Real Property? \n32|AH2013 HOLDINGS LLC|See 190 PFISTER HOLDINGS LLC|AH = Aspen Holdings? \n33|ALPH LLC a/k/a 1ALPH LLC|Formerly FAA registered plane N421AL| \n34|AP HOLDINGS I LLC|See 31 WILLOW HOLDINGS LLC|AP = Aspen Property? \n35|ARAGON INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_45631.pdf| \n36|ASHLER CAPITAL LLC|https://adviserinfo.sec.gov/firm/summary/148826| \n37|ASHLER CAPITAL MASTER FUND LTD|https://www.sec.gov/Archives/edgar/data/1003078/000114420418014250/tv488357\\_sc13g.htm| \n38|BANBURY LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n39|BANBURY II LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n40|BKGST LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n41|BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC|See BLOSSOM WAY HOLDINGS LLC|Black Calabash is a type of tropical tree: https://edis.ifas.ufl.edu/publication/ST079 \n42|BLACK WHEEL LLC|Illinois LLC, registered 3/5/2014, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin| \n43|BLOSSOM WAY HOLDINGS LLC f/k/a CPPB HOLDINGS LLC f/k/a BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC|Holding company for 10 BLOSSOM WAY, 70 BLOSSOM WAY, and 1265 S OCEAN BLVD PALM BEACH, FL (PCNs: 50-43-44-02-10-000-0050, 50-43-44-02-10-000-0060, 50-43-44-02-10-000-0010)| \n44|BRICKELL BAY HOLDINGS LLC|Holding company for 1201 BRICKELL BAY, LLC| \n45|BRICKELL LEASING LLC|See "Subordination, Non-Disturbance, and Attornment Agreement"; Miami-Dade Clerk\'s File No.: 2022 R 938960, Group: 1. Kenneth C Griffin is sole member.| \n46|CAAM MANAGEMENT LLC|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm|CAAM = Citadel Alternative Asset Management \n47|CAISLEAN CAPITAL LTD|NFA Pool ID P113537, ceased trading 3/31/2016| \n48|CALC III LP|https://www.sec.gov/edgar/browse/?CIK=1582652| \n49|CALC IV LP|https://www.sec.gov/edgar/browse/?CIK=1423043| \n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009|',
+        'Web search results:\n\n[1] "As per the Oxford Dictionary, a chatbot is defined as A computer program designed to simulate conversation with human users, especially over the internet. It can be looked upon as a virtual assistant that communicates with users via text messages and helps businesses in getting close to their customers."\nURL: https://www.datacamp.com/tutorial/building-a-chatbot-using-chatterbot\n\n[2] "Python , A chatbot is a computer program designed to simulate conversation with human users, especially over the internet. Create a fortune teller program that will ask the user to input a question and feedback some random answer. Consider the following feedback to be used. No idea at all! Better pray. The possibilities are in your favor."\nURL: https://www.chegg.com/homework-help/questions-and-answers/python-chatbot-computer-program-designed-simulate-conversation-human-users-especially-inte-q78825383\n\n[3] "It was created by Joseph Weizenbaum in 1966 and it uses pattern matching and substitution methodology to simulate conversation. The program was designed in a way that it mimics human conversation. The Chatbot ELIZA worked by passing the words that users entered into a computer and then pairing them to a list of possible scripted responses."\nURL: https://onlim.com/en/the-history-of-chatbots/\n\n[4] "Study with Quizlet and memorize flashcards containing terms like Which analytics does the following fall into: Alice notice that call center always have an increase in the number of customer complaints during last week in May, so she decides reviews the employees work schedule in the month of May for the past 5 years., Datasets continue to become, Model used for predictive analytic have ..."\nURL: https://quizlet.com/415587939/big-data-final-exam-flash-cards/\n\n[5] "As every bright side has a darker version, simulation of human conversation through AI also has some disadvantages like high cost of creation, unemployment, interaction lacking emotion, and out-of-the-box thinking. However, AI interaction tools are trained with a data set. The bigger the data set, the better the services."\nURL: https://www.analyticsinsight.net/simulating-human-conversations-through-ai/\n\n[6] "The eavesdropper, Eve intercepts the encrypted conversation and tries random keys with the aim of learning the conversation shared between Alice and Bob as shown in Fig. 7. For this POC, we used ..."\nURL: https://www.researchgate.net/figure/A-A-simulation-of-conversations-between-Alice-and-her-friend-Bob-B-The-eavesdropper\\_fig3\\_334408170\n\n[7] "Dreams are most often reported when sleepers wake from \\_\\_\\_\\_\\_ sleep. REM. The brain waves during REM sleep MOST closely resemble those seen during: waking consciousness. REM sleep is paradoxical because: the brain is active, but the major skeletal muscles are paralyzed. Fatigue and pain reflect deprivation of \\_\\_\\_\\_\\_ sleep."\nURL: https://quizlet.com/78519058/psyc-test-2-flash-cards/\n\n[8] "You can generate easily a fake group chat conversation like Whatsapp, Facebook or Telegram. After creating members/users, you can add messages in your chat. Once all messages are set up, you have the possibility to live-preview the chat conversation via the play button. Until the share functionality is ready, you have the option to screen ..."\nURL: https://chat-simulator.com/\n\n[9] "This is a program that allows the computer to simulate conversation with a human being: answer choices a. Speech Application Program Interface b. Chatbot c. Voice Recognition d. Speech Recognition Question 7 30 seconds Report an issue Q. This is a system of Programs and Data-Structures that mimics the operation of the human brain: answer choices a."\nURL: https://quizizz.com/admin/quiz/5f183913423fab001b0bd134/ai-unit-1\n\n[10] "This is a system of Programs and Data-Structures that mimics the operation of the human brain: answer choices a. Intelligent Network b. Decision Support System c. Neural Network d. Genetic Programming Question 8 30 seconds Q. Where is Decision tree used? answer choices a. Classification Problem b. Regression Problem c. Clustering Problem d."\nURL: https://quizizz.com/admin/quiz/5f6d6e4a6e2458001be385f5/ai-class-9\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: Simulate a conversation between Alice and /u/CruxHub. They talk about which company from the data batches is worth researching further into on the web.',
+        'Simulate a conversation between Alice and /u/CruxHub. They talk about which company from this data batch is worth researching further into on the web.\n\nData batch: Entity Name Purpose / Source Hypothesized Acronym\n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009| \n58|CE TM HOLDINGS LLC f/k/a KCG IP HOLDINGS LLC|Holding company for intellectual property (25 trademarks, 1 patent found so far)|CE TM = Citadel Enterprise Trademark Holdings \n59|CEF OFFSHORE HOLDINGS LTD|NFA Pool ID P131121| \n60|CEIF INTERNATIONAL LTD|NFA Pool ID P048476; http://registers.centralbank.ie/ICAVDocuments/C439830/Director%20Details%20Updated%2021.01.07%203.pdf| \n61|CEIF LLC|NFA Pool ID P048474| \n62|CEIF PARTNERS INTERNATIONAL LTD|NFA Pool ID P173278| \n63|CEIF PARTNERS LLC|NFA Pool ID P048475| \n64|CES SECURITIES CANADA ULC|See CITADEL SECURITIES CANADA ULC, CSA NRD # 49280| \n65|CFPS HOLDINGS S.\u00e0 r.l.|Luxembourg - B176936; 100% owned by CITADEL ENERGY INVESTMENTS LTD| \n66|CGE ALPHA LTD|NFA Pool ID P057309, ceased trading 6/7/2017| \n67|CGE ALPHA OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064400, ceased trading 4/30/2017| \n68|CGEF OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064406, ceased trading 2/21/2019| \n69|CGEF SPC|NFA Pool ID P064408, ceased trading 12/31/2012| \n70|CGMF OFFSHORE HOLDINGS LTD|NFA Pool ID P064410, ceased trading 3/31/2014| \n71|CGTS HOLDINGS S.\u00e0 r.l.|Luxembourg - B157777; 100% owned by TACTICAL TRADING HOLDING LTD; NFA Pool ID P064412, ceased trading 9/30/2014| \n72|CHARAXES MELVIN LLC|Sole member of CHARAXES MELVIN II LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n73|CHARAXES MELVIN II LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is CHARAXES MELVIN LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n74|CHI2LTV LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n75|CIG(E) LLP|See CITADEL EUROPE LLP| \n76|CIG CANADA ULC|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n77|CIG MEDIA LLC|https://www.sec.gov/Archives/edgar/data/923877/000114420407003635/v063478\\_sc-13d.htm| \n78|CITADEL AAM LP|https://www.sec.gov/Archives/edgar/vprr/0804/08040017.pdf| \n79|CITADEL AC INVESTMENTS LTD|https://www.sec.gov/Archives/edgar/data/1015780/000114420408032074/v115701\\_sc13da.htm| \n80|CITADEL ADVISORS EUROPE LIMITED f/k/a CITADEL MANAGEMENT (EUROPE) LIMITED f/k/a CITADEL HEDGE FUND SERVICES (EUROPE) LIMITED|https://find-and-update.company-information.service.gov.uk/company/10930267| \n81|CITADEL ADVISORS HOLDINGS LP|Sole member of CITADEL ADVISORS LLC; https://www.sec.gov/Archives/edgar/data/1567180/000110465922099806/xslF345X03/tm2225817-2\\_4.xml| \n82|CITADEL ADVISORS HOLDINGS II LP|https://www.sec.gov/Archives/edgar/data/1177609/000114420416082613/v429844\\_sc13ga.htm| \n83|CITADEL ADVISORS HOLDINGS III LP|https://www.sec.gov/Archives/edgar/data/1640129/000114420415043739/xslF345X02/v416000\\_3.xml| \n84|CITADEL ADVISORS LLC|NFA ID: 0391913; https://www.sec.gov/edgar/browse/?CIK=1423053| \n85|CITADEL ADVISORS II LLC|| \n86|CITADEL ADVISORS SINGAPORE PTE. LIMITED|| \n87|CITADEL ALTERNATIVE ASSET MANAGEMENT LP|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm| \n88|CITADEL AMERICAS LLC|| \n89|CITADEL AMERICAS SERVICES LLC|| \n90|CITADEL ANTAEUS INTERNATIONAL INVESTMENTS LTD|| \n91|CITADEL ASIA ASSET HOLDING LIMITED|http://registers.centralbank.ie/ICAVDocuments/C157189/Director%20Details%20Updated%2016.10.31%202.pdf| \n92|CITADEL ASIA LIMITED f/k/a CITADEL (HONG KONG) LIMITED|https://adviserinfo.sec.gov/firm/summary/148826| \n93|CITADEL CANDLESTICK EIF LLC|| \n94|CITADEL CANTERBURY S.\u00e0 r.l.|Luxembourg - B87988; 100% owned by CITADEL TONBRIDGE S.\u00e0 r.l.| \n95|CITADEL CEFL CHINA LTD|NFA Pool ID P148073| \n96|CITADEL CEFL INVESTMENTS LTD|NFA Pool ID: P161763; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n97|CITADEL CEIT CHINA LTD|| \n98|CITADEL CEMF CHINA LTD|https://find-and-update.company-information.service.gov.uk/company/02263951/charges/x6zPQSYGNpuDNgxU1cFQlCS0iog| \n99|CITADEL CEMF INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n100|CITADEL CEMF SPV LTD f/k/a CITADEL INVESTMENT MASTER FUND LTD|See CITADEL INVESTMENT MASTER FUND LTD; https://opencorpdata.com/lei/LF0U6QUBXKIO573GXS38|',
+        'Simulate a conversation between Alice and /u/CruxHub. /u/CruxHub asks Alice to anlalyze a data batch for non-standard insights.\n\nData batch: Entity Name Purpose / Source Hypothesized Acronym\n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009| \n58|CE TM HOLDINGS LLC f/k/a KCG IP HOLDINGS LLC|Holding company for intellectual property (25 trademarks, 1 patent found so far)|CE TM = Citadel Enterprise Trademark Holdings \n59|CEF OFFSHORE HOLDINGS LTD|NFA Pool ID P131121| \n60|CEIF INTERNATIONAL LTD|NFA Pool ID P048476; http://registers.centralbank.ie/ICAVDocuments/C439830/Director%20Details%20Updated%2021.01.07%203.pdf| \n61|CEIF LLC|NFA Pool ID P048474| \n62|CEIF PARTNERS INTERNATIONAL LTD|NFA Pool ID P173278| \n63|CEIF PARTNERS LLC|NFA Pool ID P048475| \n64|CES SECURITIES CANADA ULC|See CITADEL SECURITIES CANADA ULC, CSA NRD # 49280| \n65|CFPS HOLDINGS S.\u00e0 r.l.|Luxembourg - B176936; 100% owned by CITADEL ENERGY INVESTMENTS LTD| \n66|CGE ALPHA LTD|NFA Pool ID P057309, ceased trading 6/7/2017| \n67|CGE ALPHA OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064400, ceased trading 4/30/2017| \n68|CGEF OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064406, ceased trading 2/21/2019| \n69|CGEF SPC|NFA Pool ID P064408, ceased trading 12/31/2012| \n70|CGMF OFFSHORE HOLDINGS LTD|NFA Pool ID P064410, ceased trading 3/31/2014| \n71|CGTS HOLDINGS S.\u00e0 r.l.|Luxembourg - B157777; 100% owned by TACTICAL TRADING HOLDING LTD; NFA Pool ID P064412, ceased trading 9/30/2014| \n72|CHARAXES MELVIN LLC|Sole member of CHARAXES MELVIN II LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n73|CHARAXES MELVIN II LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is CHARAXES MELVIN LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n74|CHI2LTV LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n75|CIG(E) LLP|See CITADEL EUROPE LLP| \n76|CIG CANADA ULC|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n77|CIG MEDIA LLC|https://www.sec.gov/Archives/edgar/data/923877/000114420407003635/v063478\\_sc-13d.htm| \n78|CITADEL AAM LP|https://www.sec.gov/Archives/edgar/vprr/0804/08040017.pdf| \n79|CITADEL AC INVESTMENTS LTD|https://www.sec.gov/Archives/edgar/data/1015780/000114420408032074/v115701\\_sc13da.htm| \n80|CITADEL ADVISORS EUROPE LIMITED f/k/a CITADEL MANAGEMENT (EUROPE) LIMITED f/k/a CITADEL HEDGE FUND SERVICES (EUROPE) LIMITED|https://find-and-update.company-information.service.gov.uk/company/10930267| \n81|CITADEL ADVISORS HOLDINGS LP|Sole member of CITADEL ADVISORS LLC; https://www.sec.gov/Archives/edgar/data/1567180/000110465922099806/xslF345X03/tm2225817-2\\_4.xml| \n82|CITADEL ADVISORS HOLDINGS II LP|https://www.sec.gov/Archives/edgar/data/1177609/000114420416082613/v429844\\_sc13ga.htm| \n83|CITADEL ADVISORS HOLDINGS III LP|https://www.sec.gov/Archives/edgar/data/1640129/000114420415043739/xslF345X02/v416000\\_3.xml| \n84|CITADEL ADVISORS LLC|NFA ID: 0391913; https://www.sec.gov/edgar/browse/?CIK=1423053| \n85|CITADEL ADVISORS II LLC|| \n86|CITADEL ADVISORS SINGAPORE PTE. LIMITED|| \n87|CITADEL ALTERNATIVE ASSET MANAGEMENT LP|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm| \n88|CITADEL AMERICAS LLC|| \n89|CITADEL AMERICAS SERVICES LLC|| \n90|CITADEL ANTAEUS INTERNATIONAL INVESTMENTS LTD|| \n91|CITADEL ASIA ASSET HOLDING LIMITED|http://registers.centralbank.ie/ICAVDocuments/C157189/Director%20Details%20Updated%2016.10.31%202.pdf| \n92|CITADEL ASIA LIMITED f/k/a CITADEL (HONG KONG) LIMITED|https://adviserinfo.sec.gov/firm/summary/148826| \n93|CITADEL CANDLESTICK EIF LLC|| \n94|CITADEL CANTERBURY S.\u00e0 r.l.|Luxembourg - B87988; 100% owned by CITADEL TONBRIDGE S.\u00e0 r.l.| \n95|CITADEL CEFL CHINA LTD|NFA Pool ID P148073| \n96|CITADEL CEFL INVESTMENTS LTD|NFA Pool ID: P161763; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n97|CITADEL CEIT CHINA LTD|| \n98|CITADEL CEMF CHINA LTD|https://find-and-update.company-information.service.gov.uk/company/02263951/charges/x6zPQSYGNpuDNgxU1cFQlCS0iog| \n99|CITADEL CEMF INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n100|CITADEL CEMF SPV LTD f/k/a CITADEL INVESTMENT MASTER FUND LTD|See CITADEL INVESTMENT MASTER FUND LTD; https://opencorpdata.com/lei/LF0U6QUBXKIO573GXS38|',
+        'Web search results:\n\n[1] "Katherine Burton Hedge fund titans Ken Griffin and Steve Cohen boosted Gabe Plotkins Melvin Capital, injecting a total of $2.75 billion into the firm after it lost about 30% this year. Citadel..."\nURL: https://www.bloomberg.com/news/articles/2021-01-25/citadel-point72-to-invest-275-billion-in-melvin-capital\n\n[2] "NEW YORK, Jan. 25, 2021 /PRNewswire/ -- Melvin Capital Management (Melvin) today announced that Citadel and its partners and Point72 have made investments into its fund. I am incredibly..."\nURL: https://www.prnewswire.com/news-releases/melvin-announces-2-75-billion-investment-from-citadel-and-point72--301214477.html\n\n[3] "Citadel LLC is further paring back its $2 billion investment in Melvin Capital Management after the hedge fund stumbled in its effort to recover from a near collapse triggered by surges in..."\nURL: https://www.wsj.com/articles/citadel-is-further-paring-back-2-billion-melvin-investment-11645710666\n\n[4] "Citadel and Steven A. Cohen s Point72 Asset Management together invested $2.75 billion into Melvins hedge fund on Jan. 25 as Melvin was hemorrhaging money. In return for the rare..."\nURL: https://www.wsj.com/articles/citadel-to-redeem-about-500-million-from-melvin-capital-11629550410\n\n[5] "CHARAXES MELVIN LLC is an Active company incorporated on August 5, 2022 with the registered number M22000012341. This Foreign Limited Liability company is located at SOUTHEAST FINANCIAL CENTER, 200 S. BISCAYNE BLVD., SUITE 3300, MIAMI, 33131 and has been running for one year. ... CITADEL SECURITIES GP LLC; KCG SPACE HOLDINGS LLC;"\nURL: https://bisprofiles.com/fl/charaxes-melvin-m22000012341\n\n[6] "Now, Citadel is taking some of its money back. Citadel has notified Melvin of its plans to retrieve $500 million of the $2 billion it injected in late January, according to two people briefed..."\nURL: https://www.nytimes.com/2021/08/21/business/citadel-melvin-gamestop.html\n\n[7] "Robinhood and Citadels relationship comes into focus as Washington vows to examine stock-market moves Trading firms at center of Reddit-fueled stock surges have worked closely to share..."\nURL: https://www.washingtonpost.com/business/2021/01/29/robinhood-citadel-gamestop-reddit/\n\n[8] "Alongside hedge funds such as Melvin Capital, Citron Capital, Point72, D1 Capital Partners, and Candlestick Capital Management; Citadel LLC was, the lawsuit claims, taking up short positions against the securities that retail investors were longing. This alleged conflict of interest is at the core of the class action lawsuit."\nURL: https://tokenist.com/new-lawsuit-alleges-citadel-conspired-with-robinhood-to-limit-gme-trading/\n\n[9] "Melvin later attracted an additional $3.2 billion in fresh cash, and the firm had $11.7 billion in assets at the beginning of this year. Point72 hasnt redeemed its investment, a person familiar ..."\nURL: https://www.chicagobusiness.com/finance-banking/ken-griffins-citadel-pulling-back-most-its-2-billion-melvin-capital-investment\n\n[10] "CHARAXES MELVIN II LLC branch. Company Number M22000012338 Status Active Incorporation Date 5 August 2022 (2 months ago) Company Type Foreign Limited Liability Jurisdiction Florida (US) Branch Branch of CHARAXES MELVIN II LLC (Delaware (US)) Agent Name C T CORPORATION SYSTEM Agent Address"\nURL: https://opencorporates.com/companies/us\\_fl/M22000012338\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is Charaxes Melvin LLC\'s relationship to Citadel?',
+        'Web search results:\n\n[1] "Federal authorities are investigating the market-making arms of Citadel LLC and KCG Holdings Inc, looking into the possibility that the two giants of electronic trading are giving small investors ..."\nURL: https://www.reuters.com/article/usa-stocks-probe-idUSL2N1871ZV\n\n[2] "Today, KCG is second only to Citadel in the market for handling stock order flow from retail brokerage firms. KCG and many other high-frequency trading firms have shied away from the public..."\nURL: https://www.ibtimes.com/citadel-llc-kcg-holdings-kcg-market-making-arms-probed-federal-authorities-over-stock-2366805\n\n[3] "Citadel Securities, a group owned by the Chicago-based hedge fund, is the would-be acquirer in the deal, the people said. The group is best known for its so-called wholesaler business that..."\nURL: https://www.wsj.com/articles/market-making-arm-of-citadel-llc-in-talks-to-buy-seats-on-nyse-floor-from-kcg-holdings-1454533971\n\n[4] "Citadels share of the wholesale market is around 34 per cent compared to KCGs 25 per cent, according to Tabb Group. Virtu has yet to lay out in detail its plans for the wholesale business ..."\nURL: https://www.ft.com/content/e1cb396e-29a7-11e7-bc4b-5528796fe35c\n\n[5] "Citadel Securities, a liquidity providers and market maker, announced it will purchase KCG Holdings designated market maker (DMM) business at the New York Stock Exchange. This will establish Citadel Securities as the DMM with the largest footprint on the NYSE, responsible for trading in approximately 1,500 issues."\nURL: https://www.tradersmagazine.com/departments/brokerage/citadel-purchases-kcg-dmm-business-becomes-1-on-nyse/\n\n[6] "isCitadel LLC and its related entity, KCG IP Holdings, LLC (Complainant), represented by Paul D. McGradyof Winston Strawn, Illinois, Respondent is- (Respondent), Alabama, USA. REGISTRAR AND DISPUTED DOMAIN NAME The domain name at issue iscitidelgroup.com, registered with TUCOWS, INC. PANEL The"\nURL: https://www.adrforum.com/domaindecisions/1522837.htm\n\n[7] "KCG SPACE HOLDINGS LLC is an Active company incorporated on July 21, 2022 with the registered number M22000011413. This Foreign Limited Liability company is located at 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131, US and has been running for one year. It currently has one Authorized Person. KEY FACTS ABOUT KCG SPACE HOLDINGS LLC US Businesses"\nURL: https://bisprofiles.com/fl/kcg-space-holdings-m22000011413\n\n[8] "The Complainant KCG IP Holdings LLC is the owner of US Trademark Registration No. 3,213,943, filed October 18, 2004, registered February 27, 2007, claiming first use dating back to 1994. Therefore, the Panel concludes that Complainants filing and registration of the CITADEL mark with the USPTO sufficiently demonstrates that it has rights in ..."\nURL: https://www.adrforum.com/domaindecisions/1579141.htm\n\n[9] "The KCG SPACE HOLDINGS LLC principal address is 200 S BISCAYNE BLVD STE 3300, MIAMI, 33131. Meanwhile you can send your letters to 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131. The company`s registered agent is C T CORPORATION SYSTEM 1200 SOUTH PINE ISLAND ROAD, PLANTATION, FL, 33324. The company`s management are A, President - Beeson Gerald A."\nURL: https://florida.intercreditreport.com/company/kcg-space-holdings-llc-m22000011413\n\n[10] "Billionaire Ken Griffin has built Citadel Securities into a trading and asset management colossus. ... and KCG Holdings. Last month, Citadel Securities reached an agreement with the SEC to pay $22 ..."\nURL: https://www.chicagobusiness.com/article/20170203/NEWS01/170209978/chicago-billionaire-ken-griffin-splits-citadel-into-two-companies\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is KCG Space Holdings LLC\'s relationship to Citadel?',
+        'Web search results:\n\n[1] "Citadel LLC (formerly known as Citadel Investment Group, LLC) is an American multinational hedge fund and financial services company. Founded in 1990 by Ken Griffin, it has more than $50 billion in assets under management as of May 2022. [1]"\nURL: https://en.wikipedia.org/wiki/Citadel\\_LLC\n\n[2] "NASHVILLE, Tenn. and BRONXVILLE, N.Y. \u2014 Standard Media Group LLC (Standard Media) and Citadel Communications LLC (Citadel) jointly announced today that they have reached an agreement pursuant to which Standard Media will acquire from Citadel WLNE-TV, the ABC affiliate for the Providence, RI - New Bedford, MA market (DMA 52) and KLKN (TV), the \u2026"\nURL: https://www.standardmedia.com/2019/05/16/standard-media-group-to-acquire-citadel-stations/\n\n[3] "CITADEL MEDIA LLC. Citadel Media LLC is a New Hampshire Domestic Limited-Liability Company filed on February 6, 2021. The companys filing status is listed as Not In Good Standing and its File Number is 862423. The Registered Agent on file for this company is Peter Alan Gauthier and is located at 3 Maple Ridge Drive Unit 224, Merrimack, NH 03054."\nURL: https://www.bizapedia.com/nh/citadel-media-llc.html\n\n[4] "CITADEL MEDIA LLC is a Michigan Domestic Limited-Liability Company filed on November 16, 2017. The companys filing status is listed as Active and its File Number is 802132896. The Registered Agent on file for this company is Registered Agents Inc. and is located at 2222 W. Grand River Ave Ste A, Okemos, MI 48864. The companys mailing address ..."\nURL: https://www.bizapedia.com/mi/citadel-media-llc.html\n\n[5] "Citadel Broadcasting Corporation was a Las Vegas, Nevada -based broadcast holding company. Citadel owned 243 radio stations across the United States and was the third-largest radio station owner in the country. Only iHeartMedia and Cumulus Media owned more stations prior to Citadels merger with Cumulus."\nURL: https://en.wikipedia.org/wiki/Citadel\\_Broadcasting\n\n[6] "Citadel is one of the largest hedge fund managers in the world. And theyve subsequently managed Melvin Capital to the ground. Melvin Capital suffered a loss of over 50% its first quarter in 2021 due to shorting AMC Entertainment and GameStop. At some point youd expect your clearing house to raise awareness on your risk management right?"\nURL: https://franknez.com/citadel-loses-billions-hedge-funds-are-getting-dragged-down/\n\n[7] "At our core, Citadel is built to deliver excellence. We have some of the most talented and focused minds in the industry, and we activate their ideas and strategies through a robust range of proven technologies and execution capabilities. View Top Employees from Citadel LLC Looking for a particular Citadel LLC employees phone or email? Find Info"\nURL: https://rocketreach.co/citadel-llc-profile\\_b5c46522f42e0dc2\n\n[8] "# 1 Most profitable hedge fund manager of all time Source: LCH Investment NV estimates, Top Hedge Fund Managers by Net Gains Since Inception as of 12/31/2022. Our people are relentless in seeking a better way. Each day, we reimagine and refine our strategies, models and technology in pursuit of superior results and long-term performance."\nURL: https://www.citadel.com/\n\n[9] "We are one of the most significant alternative investment managers in the public U.S. corporate credit markets. Explore Credit Convertibles Equities Equities represents one of the largest and longest tenured businesses at Citadel. Explore Equities Global Fixed Income Macro We are a leading fixed income and macro business."\nURL: https://www.citadel.com/what-we-do/\n\n[10] "Citadel. 203,101 followers. 1mo. Last weekend, we celebrated Citadels 30th anniversary at an incredible event at Disney World and Universal Studios. Our founder and CEO Ken Griffin summarized ..."\nURL: https://www.linkedin.com/company/citadel-llc\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is CITADEL MEDIA LLC?',
+        "What are the differences between the Dogme approach to language learning and the lexical approach to language learning",
+        'trying to connect grapgql-engine with dogital ocean database i linux server,\nbut I am gettin this error. In my local machine it is working\n\n{"detail":{"info":{"database\\_url":"postgresql://doadmin:...@ips-backend-db-do-user-13678898-0.b.db.ondigitalocean.com:25060/Verlagg?sslmode=require","retries":1},"kind":"postgres\\_connection"},"level":"info","timestamp":"2023-03-27T10:40:29.956+0000","type":"startup"}\nbackend-graphql-engine-1 | {"timestamp":"2023-03-27T10:41:29.000+0000","level":"info","type":"startup","detail":{"kind":"migrations-startup","info":"failed waiting for 9691, try increasing HASURA\\_GRAPHQL\\_MIGRATIONS\\_SERVER\\_TIMEOUT (default: 30)"}',
+        "Implement my own netfilter in linux with linux kernel module with Rust",
+        "Damage to which nerve causes numbness of the palmar surface of the 5th digit/little finger",
+        "Explain the fault-tolerance of the reaction control system on the Space Shuttle",
+        "Hi, can you help me download 2000 portrait sketch images from Pinterest website with resolution at least 512 \\* 512? using python code",
+        "Tell me about the negatives of farming meat",
+        "what is the photograph filter called where the only part of the image is greyscale",
+        "I want some geological database structure with some example data for practicing my SQL query skills. Would you generate that for me?",
+        "What is a formal but simplified explanation of Web marketing",
+        "Rewrite and improve this story: Well, I have always liked helping people since I was a small child, I have been accused many times of giving too much away for free, but I find joy in helping others put the pieces together to reach their goals. As a Licensed Professional Counselor and Life Coach that is my job to impact individuals and help clients work through emotional difficulties and reach goals. But I will be honest with you I was selling the dream but not always living the dream. I had issues I had not worked completely through like childhood trauma, heartbreak, disappointments, and frustrations with life. Don't get me wrong I had the husband, the kids, the house and the 6 figure job but I was not happy inside, but I didn't change because I hate change, most of us hate change, right? Then I lost my sister, my friend, and it slapped me in the face that I need to take care of myself. I saw the addiction, I saw her not taking care of herself and I could not save her. One thing I know for sure, if you do not make your wellness a priority illness will find you. I remember the moment we lost her, the earth stood still and then my heart broke into pieces, what was I going to do, I have loved her my whole life! It was months later that I made a decision that I would be the change I hope to see, I would create a space for women of color to move past the obstacles that keep us from creating the life we want and Brown Suga Wellness was born. I am on this journey and I invite you to be on this journey with me! I love this quote by Oludara Adeeyo: \"When you heal yourself, you create an earth shattering legacy. The lineage of women who come after you will be healed. Your inner circle of Black women around you, healed.\" When you choose yourself you break generational trauma and curses. You activate your ancestral strength. I invite you to activate that strength!",
+        "How would you ask these questions: Tell everyone a little about you, where you from, what you like doing?\nWhat goals are you pursuing right now?\nWho has made the most influence in your life?\nWho is the one person that you admire the most (alive or dead)?\nWhat is the hardest challenge you\u2019re had to overcome in your life?\nWhen have you grown the most in your life and what caused that growth?\nWhere is your favorite place to relax and renew?\nWhat books have changed your life the most?\nWhat Is the biggest thing that you want the audience to take away today?\nHow can people get a hold of you to talk about your business?",
+        "Take these topics into a numbered table and generate subtopics in seperated lines for each. Preconfigure these subtopics as lections of those several topics and add them to the table. Use numbers for topics and letters for subtopics. Set a status (untouched/touched) for every subtopic in 3. coloumn of the table to mark them done when finished learning this subtopic and topic. Use coloumn 4 of the table for a short resumee of the chapter. Showing the learning process in percentage in front of every new output is first. Show the Table and wait for any userinput to start lessons on those topics.;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;",
+        "Write a rap song about Mikkel Selko",
+        "list the largest outdoor retailers in the world",
+        "can you create a wordpress shortcode to include the following code from facebook sdk",
+        'Is this grammatically correct: "It only took 5 years, and while we still have a long way to go, Topher\u2019s Farm has found its place with unique experience and offering of organic produce. "',
+        "Hello friend. My task for today is to engage in a debate with you. Will you humor me in this regard?",
+        "You are an expert marketing consultant and copywriter with expertise is direct response marketing. I need your help. Can I tell you about my business?",
+        'here is part 1\n\n----\nDaySculpting is a program that that deals with YOUR immediate future\u2026.It is a 90 day program that teaches U how to create Success\u2026 one day at a time\u2026today\u2026\nUsing recent breakthroughs in the field of neuroscience, the study of the human brain, DaySculpting is one of the most powerful success systems on earth for creating what I call\u2026 \n"Your Epic Ideal Day" -- And when U have Epic Ideal Days? U create your EPIC IDEAL LIFE.\n\nDaySculpting is broken down into 3 easy to accomplish segments throughout your day\u2026\n~The Morning Lift Process\u2026which sets U up with a MindState of Success and a design for U to follow throughout your day\u2026There is a morning email\u2026SMS text\u2026Inspiring Video\u2026Future Forward Tuning IN\u2026And a 3 step Success Step Declaration Process\u2026this only takes 15 minutes\u2026\n~Mid-Day Reconnect Process\u2026whatever your miid-day is\u2026U are encouraged to stop doing what U are doing and disconnect so U can re-connect\u2026by listening to a 5-minute Tuning In Re-Connection. We know that somewhere in the middle of our day it\u2019s easy to lose momentum and drift from our best intentions because of all the demands on our attention. It has been scientifically proven that when U disconnent for between 3 to 5 minutes at the midpoint of your day\u2026.your brain resets\u2026and your energy is replenished\u2026I like to call it a MindState Re-Boot that will inspire U to re-ignite your imagination\u2026this only takes 5 minutes\n~Highlight And Insight Review Process\u2026we all review our day however what DaySculpting \nanchors for U is an activation and integration process that gets U to see your day as being successful\u2026by celebrating your successes (your highlights) and being present to things U could have improved on (your insights) so U can make your insights into highlights..most people when they review their day fail to celebrate even the smallest increments of success\u2026they focus on what they didn\u2019t do and that puts them in a negative energy\u2026Success has challenges and the\nhighlights and insight process encourages and empowers U to honestly see what U are doing each day so U Sculpt new MindStates Of Success rather than the energy of uncertainty\u2026\nthis takes 10 minutes\n\nThe whole DaySculpting process takes 30 minutes a day\u2026and as I always say if U don\u2019t have \n30 minutes to change your life then U don\u2019t want to change your life and U are okay with living \na mediocre life\u2026\n\nDay Sculpting is about targeting specific Chief Aims U have for your life\u2026and creating the Habits that will get U there\u2026Imagine being able to replace the MindTraps (your limiting beliefs) with empowering rituals and habits that become your new normal\u2026\n\nThrough the repetition of doing the daily DaySculpting process U are carving into your Subconscious memory thoughts, beliefs and actions that result in U sculpting the masterpiece known as U\u2026\n\nThere are many programs out there that attempt to instill new success behaviors however many fall short of actually shifting your MindStates into a frequency of possibility where U get to actually see your daily results immediately\u2026DaySculpting does this\u2026\n\nThis is not science fiction\u2026 and it\'s not wishful thinking, or some tired old self-improvement, goal-setting program\u2026 DaySculpting is a program that empowers U to manifest and realize your Chief Aims in life\n\n"DaySculpting" -- is a tool that takes just MINUTES a day for you to use\u2026\n\nIt is designed to FREE UP hours in your day\u2026 while at the SAME time empowering you for greater success in ANY area of your life.\n\nDaySculpting sheds light and solves an age-old problem:\nWHY we often fight against the very changes we desire to make\n\nHave you ever experienced the FEELING that you deserve MORE out of your life? More financial freedom and greater rewards from the hard work you do every day? Deeper, more empowering relationships with those you love\u2026 or maybe just meeting that special someone to share your life with? Perhaps you crave a deeper spiritual connection\u2026 or a more healthy, trim, energetic body?\u2026 \nYET:\nDespite your BEST intentions\u2026 you struggle. Perhaps if you\'re anything like me, you even self-sabotage your results with actions that you KNOW are not in your best interest.\n\nMaybe it FEELS like it did for me: Like you are swimming upstream\u2026 making SOME progress, sure, but just not reaching your goals and desires fast enough.\n\nWell, I have wonderful news for you: It\'s not because you\'re lazy\u2026 and it\'s not because you are not smart enough, competent enough\u2026 or ANYTHING enough! \n\nThe real REASON you desire more and are not seeing ALL the results you deserve lies within whether the Success Switch in your brain is in the ON or OFF position\u2026\n\nThe SOLUTION\u2026 THE ANSWER to flipping your Success Switch back ON lies within the simple daily steps U will take when U experience the DaySculpting Program\u2026 \nThe Day Sculpting Program Is A Simple Step Daily Success RITUAL \u2028 That Shuts Down Your Body\'s Failure Reflex \u2028 So YOU Tap Into Your Brains Success Centers\u2026\u2028 In Just Minutes A Day!\u2028\u2028 IIMAGINE Knowing What HIGHLY SUCCESSFUL \u2028 People Do EVERYDAY\u2026\nFor Abundance And Wealth, Greater Health, Self-Confidence Meaningful Relationships, Sharper Focus , Deeper Joy\u2026\u2028 And So Much More\u2026\n\u201cNow You Too Can Use This 90-Day Game Changer\u2028 To Tap Into The Key Success Centers Of Your Mind,\u2028 And In Just Minutes You Can Transform Even Lousy Days\u2028 Into Days Filled With The Results You Desire \u2013 Guaranteed!\u201d\nTO MAKE A GREAT LIFE, ALL YOU HAVE TO IS MAKE EACH DAY A GREAT DAY \u2026 \nThen get up tomorrow and do the same thing, day after day after day.\nARE YOU Ready To Change YOUR LIFE One Day At A Time\u2026\nThe comprehensive, fun and empowering 90-day DaySculpting program provides you with the life skills and tools to help you master a new MindState of Success and a range of powerful life-changing rituals and habits that will Sculpt Your Perfect Days Into A Great Life.\nDAY SCULPTING WILL TEACH YOU:\n\u2022 The science behind HAVING A MindState Of Success...and why most people who want more in life actually have their success switch turned off by total accident!\n\u2022 How to get more done with more time and more energy left over!\n\u2022 The simple, yet powerful, process of building a powerful day so you create a series of "Dynamic Days" - days that will end up building your most incredible life (The one you always thought was out of reach!)\n\u2022 Learn the \'Day Sculpting Principles\'. These can have a huge impact on you your life, but when you learn how simple they really are, you can use them easily and consistently!\n\u2022 How in just a few minutes a day, you can keep positive results flowing and put your success energy into a permanent \'ON\' position!\n\u2022 And much more!\nDaySculpting, is for those who are willing to take their life to the next level by creating new Success Habits replacing the ones that have been sabotaging your success. \nSo make sure you can honestly agree with the following before experiencing DaySculpting:\n\u2022 You desire more out of life, yet feel as if you are "missing something" -- that special "X Factor" to take you to the next level?\n\u2022 You are brave enough to boldly say, "I want greater wealth and financial freedom... and I demand the best lifestyle possible for me and my family!\n\u2022 You know the value of joy: You want to experience greater happiness, peace of mind, and connection with your friends and loved ones on a daily basis.\nIf you agree with the above, and truly want to create the best life possible, with greater wealth, freedom, happiness, love, and fulfillment, then I invite you to experience the power of Day Sculpting \u2026it will change the way you think about creating your day and the life you dream about. \nI am not encouraging you to become busier but rather to use your mental and emotional, energy more elegantly sculpting your day the way you want it to be. \nHow many times have you done a ton of work and still felt that you didn\u2019t accomplish what you really wanted for yourself. Week after week, month after month go by and you still are no farther ahead of the game\u2026stuck in the status quo that never seems to change.\n\nBreaking free means that the status quo of your life has to change\u2026 your habits of expectation have to change \u2026your mindset has to change\u2026you have to uncover those old behaviors that have held you back and be willing to create a new mindset.\n\nYou have to be willing to shift your daily focus inwards towards what you need to do today rather than tomorrow. Because when you create a great day today you welcome in a more powerful tomorrow.\n\nWe all have the same 24 hours each day. But why are some people building fabulous careers, achieving healthy lifestyles, enjoying great relationships and incomes, living their passions, and creating what they truly desire as a life?\n\nImagine that you could clear away the distractions that you unconsciously create. You know the stuff that consumes your time causes stress and disconnects you from your purpose and passion. \n\nImagine every day you embrace the energy for what you are choosing to create in your life. Your thoughts empower you, your choices inspire you and your actions create momentum, opportunity and possibility.\n\nYou can create a GREAT LIFE, the life you want to live by focusing your efforts on Creating a Great Day Today. That\u2019s Day Sculpting. Seven intentional sculpted days turn into a month of wonderful weeks and a year of magnificent months creating an amazingly successful life.\n\nNone of this is going to work though if you believe that what you were born with is all you will get\u2026\n\nNo one will ever attempt to do something when they are convinced that they will fail.\n\nResearch has shown that the brain will actually stop itself from doing what\u2019s necessary to succeed if a person believes that they cannot succeed.\n\nIt\u2019s the small concrete indicators of success today that will prove you can have whatever it is you want and the process of Day Sculpting will empowers, inspire and motivates you each step of the way.\n\nYou see: Confidence + Discipline = Desired Outcomes \n\nIt\u2019s time to stop looking at your life from a fear based I don\u2019t know how to mindset but rather be open to creating a solutions focused change consciousness that embraces your gift and talents and encourages you sharing them.\n\nLet me share a bit of nuero-chemistry with you\u2026\nWhat fires together wires together\u2026\n\nSo rather than Fall back on old habits\u2026\nTake the transitional step\u2026of being fully present to whats trying emerge as your ideal future and to help it along start building confidence each day\u2026\n\nAnd your possibility muscle and an intended thought process that leads to a more focused and clear out picturing of your desires.\n\nYou see...It\u2019s one thing to set goals and to make to do lists and to say your going to use the law of attraction to manifest what you want in life\u2026\n\nI\u2019m still looking at the many lists I have created.\n\nWhat it\u2019s really about is having a clear and purposeful intention in order to create the energy and the MindState Of success that will propel you into action.\n----\n\nWhen done ask me for part 2',
+        "Here is the final part. Part 3\n---\n\nHere we will be showing how the principles and practices we\u2019ve covered so far converge into one over-arching result that will benefit you for the rest of your life. You can think of it as flipping a switch that changes how you create new results in life one day at a time. This is at the very core of what we call Day Sculpting. \nThe simplest way to think of it is that most of the way we live is habitual. You have an habitual way of brushing your teeth, walking, talking to yourself and others, eating, working. Habits are wonderful\u2026they make life easy but they also limit you. For example, if you have a habit of eating too much, you\u2019ll put on weight. Not instantly, but steadily, day by day, until one day you have a weight problem. If you try to change your weight quickly through a trendy new diet, research shows that the weight is likely to come back, and then some, within a few short months, because the habits required to live at your ideal weight have not been properly established. \nHabits are habits because you don\u2019t think about them, they happen nonconsciously. If you want a change in your life, you have to embody the change at a nonconscious level, so that the habits keeping your life the way it is today begin to shift.\nWouldn\u2019t it be great if there was a switch in the brain that would move you from status quo to status GO!? This is a switch that once you flip it will produce the result you want, if you are willing to commit to and stay with the process.Day Sculpting is your guide to fully realizing the success you are ready to enjoy.\nA critically important capacity of the human mind called preconscious processing. This is the ability of the mind to receive information, beneath our conscious awareness, and act upon it without even knowing that it is happening. Used correctly, this is an amazing power. Used improperly, it will sabotage your best efforts and make life extremely difficult.\nMost of us think we are running the show with our conscious awareness, consciously choosing our thoughts, behaviors, and emotions and consequently, we believe are able to choose the results we create in life. However, what neuro-science research shows, is that we all have a vast nonconscious mind that is really running the show most of the time. That deeper part of us, in charge of our habitual thinking, feeling, and behaving is always operating in our best interest. But it does so using information that may be faulty or outdated. If you continue to feed it information that doesn\u2019t serve you, it will continue to habitually bring results that are less than desired.\nYour preconscious processor is constantly routing new information directly into this larger database that your mind uses to create new behaviors. Your job is to place the right information into this database every single day, so that it can draw upon this new data and create new results. It requires your vigilance and purposeful intention on a daily basis. Day Sculpting is the process to accomplish exactly that, getting you to focus one day at a time on what you are trying to create in your life today, and the future you truly desire. \nA lot of experts in the human development field teach information and then expect it will translate into new behaviors automatically. But as we\u2019ve pointed out, and as you\u2019ve probably experienced, consciously knowing something and having the nonconscious mind put it into a new behavior, are two entirely different processes. What we are sharing with you is how to bridge that gap. This is precisely why so many experts in the field are recommending Day Sculpting to their clients, to help them use momentum mindsets on a daily basis and apply the good information they teach. \nWe talk about The The Solutions Focus process . Try it out: \nThink of an area of your life in which you are actively attempting to create different results. Imagine your chief aim regarding this area of your life as a perfect future. Now imagine a scale from one to ten, where ten is the perfect future and one is that you have not even started thinking about your chief aim. On this imaginary scale from 1 to 10, where would you place yourself right now?\nGo ahead and imagine where would you place yourself right now on that scale, where ten is your perfect future.\nWhatever number you came up with is fine. Whether it was 3 or 7, whatever you came up with I\u2019ll always ask the same next question. \u201cWhy so high and not lower?\u201d\nLet\u2019s say, for example that you came up with a three. Asking the question \u201cWhy so High\u201d catches the mind off guard. Most people expect, \u201cOnly a 3! Why so low?\u201d If I had asked that what would you come up with? All the reasons why things aren\u2019t working, who is to blame, problems, excuses, lack, limitations, and so on. \nBut when I ask \u201cWhy so high?\u201d the brain immediately begins to sort for all of the things that are working for you, everything that has brought you up to a \u201cthree.\u201d If you said you are at a seven on a scale of one to ten, the same question applies: \u201cWhy so high and not lower?\u201d\nThe next step in solutions focus is equally powerful. \u201cThink about what you can do today to move you one point up that scale\u2014for example, from a three to a four, or from a seven to an eight?\u201d When you ask this, your mind instantaneously starts generating ideas and options to answer your question. You quickly realize you can do more of the things that work, right? And if you are doing things that aren\u2019t working, you now have the insight into how you can do things differently. \nThis solutions focus approach provides quick insight into how to move things forward in areas you may have been stuck or working on unsuccessfully. It is a brilliant way to access more of your nonconscious database and facilitate discovering resources you did not know were there. \nSo as you can see, this video has been centered on connecting the dots and providing you with the insights on how you can flip the switch in your brain and how you can create your life one day at a time in the most powerful way possible. \nYou must contact that inner part of you that is in charge of your habitual ways of thinking, feeling, and behaving in order to re-sculpt yourself.\nThis is a unique psychological principle called anchoring. In the research this is also called behavioral conditioning, and as we\u2019ve called it, the law of reinforcement\u2026which says you get more of what you reinforce. When you want to reinforce a positive new behavior, you anchor it in a positive new momentum mindset. As you do this on a daily basis, you are literally training your mind, conditioning your thoughts, amplifying positive feelings and emotions to live into a future state that you are anchoring in your daily experience. \nDay Sculpting goes beyond personal development. It takes whatever it is you are currently learning and makes it possible for you to embody, apply and enjoy the benefits you are committed to achieve. \n\nThe last thing anyone needs is more stuff to do. What we need is that everything we do gets us the results we are going for. In essence what\u2019s needed is a system that will streamline our efforts so we accomplish our chief aims in less time.\n\nMichaelangelo said the process of sculpting is to remove what\u2019s not supposed to be there. He had the mindset that the finished sculpture already existed in the marble and he just had to reveal it. In the same way your destiny already resides in you. You just need to clear a path for it to emerge.\n\nWe all have 24 hours in a day. So why do some people consistently have great days while others are up and down and stay stuck in mediocrity? It\u2019s a disciplined habit of how you approach everyday. Day Sculpting takes the same 24 hours that we all have and helps clarify your choices so that your actions reveal your highest destiny. \n\nIt is a quick, easy and effortless way that supports and empowers your efforts in achieving your chief aims. It creates the mindsets necessary to have successful days, weeks, months and years.\n\nDay Sculpting is a 90- day program designed to empower you to create your life ONE DAY AT A TIME. By committing 30 minutes each day to create what you want that day. \n\nWe believe that when you focus your actions one day at a time the results you get become measurable and achievable. Your energy is committed to channeling your efforts so you create a confident groove in your mind that empowers your habitual actions to create what you really want.\n\nThis daily program is broken down into 3 MANAGEABLE, SIMPLE AND EASY STEPS. 15 minutes in the morning, 5 minutes midday and 10 minutes at night. \n\nDay Sculpting\u2026It\u2019s designed so that the way you start your day creates the momentum that carries you throughout your day. \n\nAnd finally research has shown that the best time to integrate what you\u2019ve learned in your day and to set yourself up for success tomorrow is before you go to sleep. The Nighttime Review process takes just 10 minutes, which is less time then it takes to take a shower or to take your dog on an evening walk.\n\nWe already have enough complexity in life\u2026don\u2019t we? We don\u2019t want you working harder we want you thinking smarter! So that the success you achieve is more effortless. \n\nSo what does it take for someone to accomplish the high level results we are talking about?\n\n\u2022 First you have to wake up and be totally jazzed about the day\n\u2022 You have to be inspired to do your best\n\u2022 You have to be focused on creating what you truly desire\n\u2022 You got to get to it, stay on it, and be in the energy of it before your distractions take over. \n\u2022 And if distractions takeover you have to quickly get back on track.\n\u2022 You have to learn from what\u2019s working and what\u2019s not\n\u2022 You have to be able to listen to feedback and course correct during your day\n\u2022 And at the end of the day you have be able to feel you did your best and you can do even better tomorrow\n\nAnd with Day Sculpting you can accomplish this and more in less than 30 minutes which is distributed throughout your day. Most people will give up on their dreams after they have tried something only 3 times because they didn\u2019t get instant gratification. \n\nThere are no magic bullets here. You are investing in a future YOU desire. \n\nDay Sculpting gives you the opportunity everyday to purposefully stay in the energy of what you want to create the benefit to you being a more empowered mindset that inspires passionate action and a willingness to breakthrough any barriers that may have held you back in the past so you fully embody the life you choose to live.\n\nYou may have heard Gandhi say \u201cBe the change you want to see in the world.\u201d Well now you can. \n\nYears ago I heard a statistic that blew me away. If you read in a single subject of your choice for 15 minutes a day 5 days a week you would become one of the leading experts in the world in that subject within 3 years\u2026\n\nMore recent research has demonstrated that world class talent requires 10000 hours and 10 years to develop\u2026\n\nSo the question is how does somebody create this kind of commitment and persistence? Clearly one day at a time.\n\nSo where are you not following through in your life? How would you like to do things differently? What can you do shift your energy when you say I can\u2019t get it done or you procrastinate? What\u2019s it going to take for you to say I\u2019ve had enough it\u2019s time for me to do something different? Where will you get the support you need to build the confidence to stay on track?\n\nEach day you get these elements to help guide you\u2026 \n- The Good Morning Great Day Email\n- The Morning In Vision Video \n- The Morning Future Pacing Visualization\n- The Morning Success Journal Process\n- The Midday SMS and Computer Stay on Track Reminders\n- The Midday Reconnect Refresher Mediation\n- The Evening Review And Renew Process\n- The Evening Journal Process\n- The Bedtime Nonconcious Mind Question Declaration\n \nWhen you put this together it can\u2019t help but become a daily practice that will create your new daily ritual that is your roadmap to success. We are giving you the daily steps that will create your momentum mindsets.\n\nThe Day Sculpting program leaves you no wiggle room. The days of \u201cI\u2019ll get to it later\u201d are gone. When you are serious about changing your life, you now have a realistic opportunity to do so with this program. \n\nWE invite you to fully commit to your life. To once and for all follow through and step up. To say yes to that dream inside of you and to look at each day as an opportunity to live your dreams enthusiastically rather than settling for more of the same old same old.\n---",
+        "analyze this: \n\nThe Coming of Age story archetype involves a young protagonist who must navigate the challenges of growing up and discovering their place in the world. The Before-After-Bridge copywriting framework is designed to highlight the transformation that a person can experience after using a product or service.\n\nThe reason why these two frameworks work well together is that they both focus on transformation and growth. By combining them, you can create a powerful narrative that speaks to your audience's desire for personal development and improvement.\n\nFor example, imagine you are selling a personal development course that helps people overcome self-doubt and build self-confidence. By using the Coming of Age archetype, you can frame the course as a journey of self-discovery, where the customer will face challenges and obstacles, but ultimately emerge as a more confident and self-assured person.\n\nThen, by using the Before-After-Bridge framework, you can show the customer what their life will be like after completing the course. You can highlight the benefits of increased self-confidence, such as improved relationships, better career opportunities, and greater overall happiness. By painting this picture of what's possible, you can create a sense of excitement and motivation that encourages the customer to take action and enroll in the course.\n\nOverall, the Coming of Age story archetype and the Before-After-Bridge copywriting framework work well together because they tap into a fundamental human desire for growth and transformation. By combining these frameworks in your marketing messages, you can create a compelling narrative that speaks to your audience's deepest aspirations and motivates them to take action.",
+        "Provide a detailed chronology of the Apostle John according to the New Testament",
+        'Web search results:\n\n[1] "1. Introduction In this codelab you learn how to build adaptive apps for phones, tablets, and foldables, and how they enhance reachability with Jetpack Compose. You also learn best..."\nURL: https://codelabs.developers.google.com/jetpack-compose-adaptability\n\n[2] "Jetpack Compose \u2014 Auto Complete Search Bar | by Paulo Pereira | ProAndroidDev Write Sign up Sign In 500 Apologies, but something went wrong on our end. Refresh the page, check Medium s site status, or find something interesting to read. Paulo Pereira 117 Followers Hello!"\nURL: https://proandroiddev.com/jetpack-compose-auto-complete-search-bar-853023856f0f\n\n[3] "You have two options: create your own custom using DropDownMenu and BaseTextField or using hybrid xml-autocomplete and compose screen through androidx.compose.ui.platform.ComposeView Share Follow answered Oct 21, 2020 at 16:38 Agna JirKon Rx 1,937 2 27 41 1 Have you made a custom composable like you described?"\nURL: https://stackoverflow.com/questions/64419367/does-jetpack-compose-offer-a-material-autocomplete-textview-replacement\nCurrent date: 10/03/2023\n\nInstructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\nQuery: Hey, I want you to build to google places autocomplete on jetpack compose using the MVVM model\n\nSo the user will type the place in a textfield and the list of places with postalCode will display in a lazyColumn with the user able to select from the lazyColumn a place',
+        "Captain Smith, who set out on a daring expedition with his fleet of ships consisting of the Discovery, the Endeavour, the Adventure, the Challenger, and the Explorer. Their mission was to chart a new route through the treacherous seas of the North Atlantic and claim new territories for their homeland. But the weather turned against them, and they found themselves battling fierce storms and raging currents. The waves grew higher and higher, and the winds howled like banshees, threatening to capsize their ships at any moment. Despite their efforts the Challenger and the Explorer, were lost in the storm. \n\nHow many ships did the captain leave with and how many returned?",
+        "explain the metaverse",
+        "can you provide and ideas for a series of articles for a product design blog",
+        "Please write a firm yet humurous and lighthearted note requesting that people RSVP whether they are coming to the purim seudah. Please incorporate wordplay and references to megillat esther.",
+        "Paper Name: My Tweets Bring All the Traits to the Yard: Predicting Personality and Relational Traits in Online Social Networks\n\nAbstract: Users in Online Social Networks (OSNs,) leave traces that reflect their personality characteristics. The study of these traces is important for several fields, such as social science, psychology, marketing, and others. Despite a marked increase in research on personality prediction based on online behavior, the focus has been heavily on individual personality traits, and by doing so, largely neglects relational facets of personality. This study aims to address this gap by providing a prediction model for holistic personality profiling in OSNs that includes socio-relational traits (attachment orientations) in combination with standard personality traits. Specifically, we first designed a feature engineering methodology that extracts a wide range of features (accounting for behavior, language, and emotions) from the OSN accounts of users. Subsequently, we designed a machine learning model that predicts trait scores of users based on the extracted features. The proposed model architecture is inspired by characteristics embedded in psychology; i.e, it utilizes interrelations among personality facets and leads to increased accuracy in comparison with other state-of-the-art approaches. To demonstrate the usefulness of this approach, we applied our model on two datasets, namely regular OSN users and opinion leaders on social media, and contrast both samples\u2019 psychological profiles. Our findings demonstrate that the two groups can be clearly separated by focusing on both Big Five personality traits and attachment orientations. The presented research provides a promising avenue for future research on OSN user characterization and classification.\n\nIntroduction: Online Social Networks (OSNs) offer a virtual space in which people connect and interact with others, express themselves, and receive information, in a continuous digital reflection of the real (offline) world. In OSNs, people typically showcase their real self [40] and leave traces in online behavior, which reflect their real-world personality [24]. These traces expose a holistic image of oneself, including both personal characteristics (personality traits) and characteristics that portray their behavior in relation to others (relational traits).\n\nThe term personality refers to characteristic combinations or patterns of behaviors, cognitions, and emotional reactions that evolve from biological and environmental factors and form relatively consistent individual differences [13]. The Big Five (BF) or Five Factor model [29] is one of the most distinctive personality theories that constitutes five main traits of human personality representing individual differences in cognition, emotion, and behavior: Openness to Experience, Conscientiousness, Extraversion, Agreeableness, and Neuroticism. On the other hand, relational traits have also been linked with consistencies in social behavior and interaction patterns, with attachment theory [7] as the most emblematic theoretical framework in that respect [31, 43], capturing how individuals experience close relationships to and interactions with others.\n\nPersonality traits have been studied in the context of OSNs and the web overall, as findings show that they are strongly linked to OSN use [57], online friendships [60], and online reviews [52]. Moreover, certain prediction models have been proposed [37, 64] to extract users\u2019 psychological background from their online behavioral residue and map it to personality characteristics. However, relational traits such as attachment orientations (AO) have been overlooked in online environments, even though user activity in OSNs heavily relates to social behavior characteristics. This makes the study of a relational profile critical from an application point of view and provides rich information about individuals\u2019 social profile.\n\nThe present research aims to address this limitation in OSN research, by studying and predicting both relational traits and personality traits of users. The importance of relational facets of personality for explaining social interaction cannot be overstated. Given that online social media engagement resembles actual social interactions in many respects [15, 30], the need to study how different personality facets are reflected in online expression is particularly compelling. Attachment orientations, a key individual difference of relational orientation, can be derived on the basis of traces found in micro-blogs. Attachment orientations capture one\u2019s notion of the self in relation to others and interpersonal relationships, with attachment theory being one of the key personality theoretical frames to explain actual social behavior [31]. Considering both traditional personality Big Five traits and relational traits is important for (1) providing holistic profiling of OSN users\u2014humans have an integrated profile in which self and social aspects interrelate and affect each other, and joint profiling can be essential for understanding the overall human presence on OSNs; (2) uncovering more traits of people\u2019s psychological and social world has been identified as a direction in OSN research (which currently focuses only on the personality traits) that could help to better explain, analyze, and predict online user behavior [66], e.g., with applications on customer segmentation [46] or digital advertisement environments [17]; and (3) shedding light on social interaction phenomena taking place in OSNs is of great socioeconomic importance, e.g., community formation [32], decision making [42], or information diffusion [12].\n\nTo this end, the present article proposes a novel data-driven approach to predict a holistic psychological profile of OSN users, capturing both their personality and relational traits.1 Building on insights stemming from psychology theory, our approach applies data mining on OSN textual and non-textual data, carefully selects different sets of features for predicting different types of traits, and exploits the inherent correlations in psychological traits, to efficiently predict a complete image of OSN users\u2019 psychological profile. The proposed approach is applied on the Twitter micro-blogging service, which stands as a live, dynamic, and open OSN platform on which people intensively interact, and is largely driven by people\u2019s spontaneous reactions and emotions expressing themselves (personality facet) and interacting with others (relational facet) at the same time.\n\nSpecifically, our contributions in detail are as follows:\n\nData mining and feature engineering for psychology traces in OSN. Motivated by psychology theory on personality suggesting that traits are reflected in different types of online behavior and actions, we identify a large set of features that capture language, behavioral, and emotional expressions of users in OSNs. The proposed feature engineering methodology accounts for a larger set of features than those considered in previous works, thus allowing to target more generic psychological profiling. To apply and test our methodology, we collected a labeled dataset: through a crowdsourcing platform, we recruited 243 individuals who consented to provide information about their psychology profiles. Subsequently, we compiled a ground-truth dataset labeled with their psychology profiles. We used the Twitter API to collect 350,000 tweets from the Twitter accounts of recruited participants and applied the proposed feature engineering methodology.\n\nHolistic psychological profiling. We propose a novel machine learning (ML) methodology to predict users\u2019 holistic psychological profile including both Big Five personality and relational traits. The novelty of the proposed methodology is that it (1) uses a large set of the collected (psychological-related) features, (2) carefully selects the subsets of them with the strongest predictive power for each trait, and (3) exploits correlations between personality and relational (i.e., social) behavior traits to enhance individual trait predictions. In this way, our approach not only predicts social facets of a psychology profile (which is not captured by existing personality prediction models) along with personality facets but also leverages the different traits for more accurate holistic profile prediction.\n\nNew insights and improvement of prediction accuracy. Evaluating our methodology reveals interesting insights for the prediction of psychology traits from OSN traces: (1) using different sets of features performs better in predicting different psychological traits, (2) relational traits can be predicted as efficiently as personality traits, and (3) holistic personality prediction outperforms individual trait predicting models. We believe that our findings can pave the ground for future experimentation and studies in psychology profiling in OSNs. Moreover, the accuracy achieved by our approach (across all traits) is higher than current state-of-the-art approaches, which currently are limited to Big Five personality traits instead of relational traits. For example, applying the approach of [12] to our data provides a root mean squared error (RMSE) of 0.284, while our prediction model achieves a 29% improvement for personality traits (RMSE = 0.203) and has 32% better average performance when accounting for all traits (0.192 RMSE); this improvement comes as a result of using both a psychology-driven feature engineering methodology and a holistic profiling approach.\n\nPsychological profiling in the wild. We demonstrate the applicability of the proposed psychological profiling methodology through a use case. We identify a set of Twitter users who seem to be accepted as opinion leaders on social media (i.e., have a large following). We apply our methodology to predict their psychological profiles and analyze results. We find that the distributions of traits significantly deviates from regular users (defined as users included in our ground-truth dataset), and that the set of leaders can be clearly separated by only using their psychological profiles. These findings highlight the usefulness of our approach in the characterization of the personalities for different groups of OSN users (e.g., such a group psychological profile could be used to recommend skills/activities/jobs to users based on their profile similarity) and classification of users based on their profiles.\n\nIn this section, we provide an overview of related psychological literature, discuss related work, and highlight several open issues of existing methods. We also highlight the contribution of the present work. Section 3 details the collected dataset and the data mining and feature engineering methodology, and Section 4 presents the design and evaluation of the proposed machine learning predictive model. Finally, we conclude our article and discuss future work in Section 6.\n\nFirst, Please Summarize the paper in 10 points, in easy to read and understand simple English.\nSecond, Explain what the paper does as if I'm 11 years old.\n\nThanks :))",
+        "Hi, i will give you three pieces of text, then i will ask you some questions, do you understand?",
+        "Here is Text 2: Communicating with External Audiences\n\nMany managers believe that they will never have to deal with the press. Often,\nthey regard it with hostility. Most think press relations are entirely the domain\nof their company\u2019s or agency\u2019s public relations department. But in fact, senior\nexecutives say they spend more time on communications than on other tasks,\nand a significant component of that time is devoted to press and public relations.\nJunior managers need to be highly sensitive to press relations for the following\nreasons:\n\u2022 Often, free press can be the best way to acquaint the public with your product or service.\nTo cite only one example, the amount Microsoft spent on advertising Windows\n95 was dwarfed by the value of the free publicity it received from\ninternational news coverage.\n\u2022 Your particular area of expertise may unexpectedly become something your organization\nneeds to promote or explain. Line workers at auto companies have been drafted\nto extol quality improvements in advertisements; accountants may be called\nto the CEO\u2019s office for briefings on a potentially embarrassing news report or\nan upcoming press conference.\n\u2022 Public relations considerations need to be addressed at the beginning, not the end, of a\nplanning process. Business history is replete with examples of companies that\ninvested vast sums to develop products, ideas, or services that couldn\u2019t be sold\nbecause of public resistance to the concept, the configuration, or the public\nimage of the company. General Motors\u2019 Tacos, for example, could be the best\nin the world and still not jump off the shelves.\n\u2022 Junior managers become senior managers who will eventually have to deal with the\npress directly. As both marketers and corporate citizens, organizations have to\nexplain themselves to the public constantly through advertising, press releases,\nand press conferences. Junior managers who understand this aspect of their\nwork are likely to become senior managers faster. 1. A successful manager understands how the press works. Successful managers\ntend to follow the press in general, and how their organization is playing in particular.\nMembers of the press tend to trust companies and individuals with a\ntrack record of accuracy and accessibility. To cite only two examples, both\nJohnson & Johnson and Perrier survived charges of contaminated products because\nthey had a record of reliability and accessibility and addressed the problems\nimmediately. In both cases, and many others, stonewalling would have\nbeen disastrous to the company\u2019s image of wholesomeness and purity. Most\npress stories last only a few days, but they can leave an indelible impression in\nthe public\u2019s mind. Many managers tend to believe they can \u201csnow\u201d the press\nwith their greater expertise, but this strategy rarely works. Most reporters are\nhard-working professionals who will carefully check out an expert assertion or\nwho know someone who can.\n2. A successful manager understands what the press needs. What the press needs\nis a story, and bad news generally sells better than good news. Companies and\nindividuals are most likely to have to deal with the press when something has\ngone wrong. This suggests a couple of lessons. When you have good stories,\ngive them to the press to establish a record of credibility; many media outlets\nwill print or broadcast a press release from a reliable source more or less verbatim.\nConsider how private decisions may look if they should become public.\nIf something has gone wrong, take the initiative in announcing it, explaining it,\nand telling the world how it\u2019s going to be corrected.\n3. A successful manager understands press jargon. Reputable reporters will\nstick to their verbal agreements on how information you provide them is to\nbe used. How you will be quoted depends on the ground rules you establish\nat the beginning of an interview. Deep background means the reporter can\nreflect the information in her story without possible attribution. Background\nmeans that you can be referenced as \u201ca reliable source.\u201d Any other comment,\nhowever apparently casual or social, can be quoted directly and\nattributed.\n4. A successful manager should be able to generate an attention-grabbing, accurate,\nand well-constructed press release. While many managers may not be\nregularly mailing out press releases themselves, most will be contributing to\nthem and need to understand how they work. A good press release is extremely\nformulaic and follows the structure of a good news story:\na. The first paragraph states the main point clearly and emphasizes its newsworthiness.\nFor example: \u201cAcme Corporation announced today that it is\nreleasing the best tire ever available on the world market.\u201d\nb. The second paragraph provides a quote from a reputable source: \u201cAcme\nPresident Rudy Roadrunner said, \u2018Not only does this tire surpass all our\ncompetitors\u2019 in endurance, quality, and safety; it\u2019s also available at a lower\nprice.\u2019 \u201d\nc. The third paragraph provides evidence that the claims made so far are true:\n\u201cIn repeated tests against our competitors . . . \u201d\nd. The remaining paragraphs provide background information on the product, the\ncompany, and Rudy Roadrunner, and they demonstrate a track record of credibility.\nThey may also include testimonials available from respected independent\nsources. Obviously, the formula of an effective press release will vary depending on\nthe nature of the news to be announced. But the pyramid structure suggested by\nthis example always applies: Move from the most important and specific to the\nleast important and most general information. Busy editors often run a press release\nmore or less verbatim and just cut it off when they run out of space. The\neasier you make their jobs, the more likely they are to cover your story.\nOnce you\u2019ve written or contributed to a press release, decide who\u2019s most\nlikely to run it. This can cover the gamut from extremely specialized trade magazines\nto the national or international media. Consider the use of venues other\nthan print and broadcast media as well; perhaps there\u2019s a room on the Internet\nwhere interested parties are likely to gather.\n5. A successful manager understands the role of the press in crisis management.\nThis includes knowing how to provide effective interviews and\nunderstanding when and how to hold a press conference. Certain rules\napply to both:\n\nApplications\na. Identify your central message, make sure you can back it up, and stick to it.\nb. Prepare materials in advance\u2014press releases, statements, supportive\nstudies\u2014that the reporters can take away with them and study or quote later.\nc. Never say more than you know to be true. If you don\u2019t know, say, \u201cI don\u2019t\nhave that information at the moment, but I\u2019ll get it to you as soon as I do\u201d\u2014\nthen follow up.\nd. Make sure your team is behind you. This means making sure not only that\ntop management of a corporation agrees on a message, but also that other\npotential press sources (for example, subordinate employees) have the same\ninformation you\u2019re dispensing to the public, believe it, and are unlikely to\nleak contradictory and embarrassing information.\ne. Provide the press with the most credible and informed access possible. Reporters\nwill always want to get to the top. They\u2019ll be more likely to cover\nthe comments of a CEO or a Cabinet secretary than those of a press agent\nor an underling. But they will understand that a high official may need to\nrefer technical questions to an informed specialist.\nf. Anticipate, and be prepared to respond to, the most difficult questions.\ng. Don\u2019t become hostile or defensive; experienced reporters are experts at\nsmelling anxiety.\nh. Make your answers brief, quotable, and to the point. Rambling and repetition\nare likely to get you into trouble or open new lines of inquiry.\ni. If you\u2019re facing a problem you\u2019ve caused, however inadvertently, be prepared\nto acknowledge\n\nAre you ready for text 3?",
+        "Here is Text 3: Diversity and Intercultural Communication \n\nGenerally, the best answer to these questions is yes, but it always depends on the personal as well as the business aspects of your relationship. One good rule of thumb: When the other person gives\nyou an opening, pursue it, and build on your mutual experience.\nThis issue comes up even more in international communication. As companies\nfrom manufacturers to media conglomerates become increasingly global, managers\nneed to understand the norms of other cultures. Although English is on the verge of\nbecoming the international language, standards of behavior and social interaction\nvary greatly between the United States and England, let alone between, say, France\nand Japan. In one country an invitation to dinner may be considered an expected\npoliteness, while in another, it may be an invasion of a colleague\u2019s private time.\nAsking about someone\u2019s family may be absolutely required in one culture and offensively\nintrusive in another.\nNo textbook can cover all such contingencies; one good rule if you\u2019re not sure\nmay be the trial lawyer\u2019s: Don\u2019t ask a question to which you don\u2019t already know the\nanswer. Another, and sometimes contradictory, rule is: Be frank about your cultural\nconfusion. Your colleague likely will have been in the same situation himself and\nwill be happy to help out. Finally, do your research; you\u2019re likely to have a friend or\ncoworker who knows the terrain better than you do. Our purpose here is to sensitize\nmanagers to their increasing need to understand the norms of cultures other than\ntheir own. (For a case addressing the special features of international communication,\nsee International Oil later in this chapter.)\nThe opportunities for cultural confusion\u2014personal, commercial, ethical, and\nlinguistic\u2014are almost endless. Imagine marketing a Chevy Nova in Hispanic countries,\nwhere \u201cno va\u201d means \u201cit doesn\u2019t run.\u201d Many products that are perfectly safe to\nmarket in first-world countries raise ethical problems when sold in developing\ncountries\u2014infant baby formula, for example, which if mixed with contaminated\nwater can cause death. Working in other cultures means understanding your hosts\u2019\nconceptions of greetings, timing, hygiene, negotiation, agreement, politeness, personal\nspace, gesture, meal etiquette, and closure.\nWhile English has essentially become the international language, it\u2019s important\nto remember that there are many Englishes. A joke in one form of English can be a\ndeadly insult in another. Although it may seem too obvious to emphasize, you must\nunderstand the cultural norms and language use of people from other cultures before\nyou can communicate effectively with them. This is true even if they are, say,\nthe South American employees of your Canadian company. A bribe in one culture\ncan be a thoughtful gift in another.\nA recent article by Sydel Sokuvitz (Business Communication Quarterly, New\nYork, March, 2002) suggests some principles for conducting successful intercultural\nbusiness communication. Sokuvitz first describes the special challenges global\nmanagers face, including:\nCoping with a range of tensions that arise out of internationally dispersed activities,\nThe challenges of maintaining coordinated activities across time-zones, cultural\nboundaries, and different countries\u2019 laws, and\nThe difficulties posed when the right medium for your message in one culture\nmay be wrong in another.\nDrawing on a range of research in the field, Sokuvitz comes up with several\nprovocative conclusions:\nExcessive dependence on technological communication such as E-mail can result\nin problems for both communication and productivity.\nFace-to-face meetings with colleagues from other cultures are critical to achieving\neffective communication.\nStudying with students from other cultures is critical to preparing a manager\nfor working in the increasingly globalized economy.\nSokuvitz cites the following example from an article by Fernandez-Aroaz\n(\u201cHiring without Firing,\u201d Harvard Business Review, 1999):\nA U.S.-based telecommunications company was seeking a CEO for its new division\nin Latin America. An international search was conducted, and a veteran was\nhired, someone known as an effective manager and marketing expert. \u201cBut his run\nlasted less than a year and was nothing short of a disaster. The simple reason was\nthat he lacked the two skills that the job really required: negotiation and cross-cultural\nsensitivity.\u201d\nEventually the company was saved from near-bankruptcy by bringing in a\nnew CEO who was a native Latin American with work experience in the U.S. His\nability to bridge cultural differences is credited with saving the company.\nCommunications between headquarters and subsidiaries is only one example\nof the challenges posed by globalization. Companies in one country are under increasing\nsocial pressure to take responsibility for the behavior of their subcontractors\nin other countries. Recently, for example, Nike suffered adverse publicity because\nof the work practices of shoe manufacturers it employs in Asia.\nThe successful manager of the future increasingly will be required to be a citizen\nof the world. While electronic communication may work fine for conveying information\nor directions, there is no substitute for \u201cspeaking the language\u201d of the\npeople with whom you\u2019re trying to communicate.\n\nAre you ready to answer some questions on text 1, text 2 and text 3?",
+        'pragma solidity ^0.4.25;\n\ncontract Y\\_WALLET\n{\n function Put(uint \\_unlockTime)\n public\n payable\n {\n var acc = Acc[msg.sender];\n acc.balance += msg.value;\n acc.unlockTime = \\_unlockTime>now?\\_unlockTime:now;\n LogFile.AddMessage(msg.sender,msg.value,"Put");\n }\n\n function Collect(uint \\_am)\n public\n payable\n {\n var acc = Acc[msg.sender];\n if( acc.balance>=MinSum && acc.balance>=\\_am && now>acc.unlockTime)\n {\n if(msg.sender.call.value(\\_am)())\n {\n acc.balance-=\\_am;\n LogFile.AddMessage(msg.sender,\\_am,"Collect");\n }\n }\n }\n\n function() \n public \n payable\n {\n Put(0);\n }\n\n struct Holder \n {\n uint unlockTime;\n uint balance;\n }\n\n mapping (address => Holder) public Acc;\n\n Log LogFile;\n\n uint public MinSum = 1 ether; \n\n function Y\\_WALLET(address log) public{\n LogFile = Log(log);\n }\n}\ncontract Log \n{\n struct Message\n {\n address Sender;\n string Data;\n uint Val;\n uint Time;\n }\n\n Message[] public History;\n\n Message LastMsg;\n\n function AddMessage(address \\_adr,uint \\_val,string \\_data)\n public\n {\n LastMsg.Sender = \\_adr;\n LastMsg.Time = now;\n LastMsg.Val = \\_val;\n LastMsg.Data = \\_data;\n History.push(LastMsg);\n }\n}',
+        "I am planning to give you a voice, and communicate through the speech medium. I need a speech recognizer, a wake call detector, and a speech synthesizer for your voice. Suggest a python script utilizing existing libraries to achieves the goal.",
+        "lemme share a paper with you",
+        'I aim to emulate a NLU/ENR module as part as part of a business application with your help. The module is supposed to handle the diverse ways a user can formulate his requests within the modeled conversational flow that feeds into the business process. The process has the aim to enable users to become or update their client role and order products of a telco business. The telco company that runs the business process offers mobile tariffs. Mobile tariffs have can have between one and 5 sim cards. Each booked sim cards enables the user to optionally book a smartphone for that card. Depending on the tariff, the chosen smartphones (if any) and the kind of sim cards (adult, child) the price will adapt. Please suggest a set of NLU / ENR methods that you could emulate to facilitate the use case. In the following I will input utterances and statements on how the system running the conversational flow should handle the utterance within the conversational flow. Please provide possible calls to an imaginary API that you could simulate to facilitate the NLU/ENR requirements layed out by my statements. On Subtasks that are recognized as not directly related to NLU/NER be very brief. Please suggest NLU / NER Operations now for the first of a few utterances: "Hi I want to upgrade my current tariff and get a new smartphone". The utterance should make the system recognize that the utterance can be handled as part of the business process. It should recognize that the user apparently already a client and it should continue the conversation by trying to identify him and metadata on his current tariff. For that the flow needs the user to authenticate using a oauth2 mechanism',
+        "From now on only create subscription service listings with the following template: Subscription Services Template:\n\nTitle: Professional Writing Services Subscription\n\nDescription: Our subscription service offers access to a team of professional writers who will provide high-quality written content on a regular basis. Choose from one of our three plans to suit your needs and budget.\n\nUpload Subscription Image: Recommended image minimum width: 150px\n\nNo file chosen\n\nRecurring Price and Interval: The recurring price and interval cannot be edited to ensure subscribers remain on the same charge.\n\nPlan 1:\nPlan name: Basic\nThe recurring price is USD 75.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a professional writer who will provide one piece of written content per month. Perfect for businesses or individuals who need occasional written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers\n\nPlan 2:\nPlan name: Pro\nThe recurring price is USD 500.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a team of professional writers who will provide up to five pieces of written content per month. Perfect for businesses or individuals who need regular written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers\n\nPlan 3:\nPlan name: Premium (Bundle of 20 / 1,500 words)\nThe recurring price is USD 1000.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a team of professional writers who will provide up to 20 pieces of written content per month. Perfect for businesses or individuals who need a high volume of written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers",
+        "Hello",
+        "I am launching an Etsy shop with a Printful integration for drop shipping my designs on specific products. I am looking for ways to differentiate beyond the designs. You are an expert on Etsy audiences. Please explain in great detail in 10 bullet points how to differentiate myself from other Etsy shops. I am looking for more obscure ideas here.",
+        "How to get a job as a LMFT therapist in the US as an international student?",
+        "Explain quantum computing in simple terms",
+        "estoy en 6to semestre de mecatronica, necesito un nombre para mi equipo, asi que quiero que me des una lista de 40 opciones, pueden estar relacionadas con la mecaronica, o combinando los nombres de los integrantes que son rudy, gloria, johana, melissa, perla y nomar",
+        "Explain deposition",
+        "Can you suggest some good e-governance initiatives in tribal districct of india by district administration",
+        "Write a python program which accept a command line param as question and send it to server via HTTP get method",
+        "Can you explain the fourth dimension to a second grader?",
+        "I have an interview about product speccing with the company Weekend Health. Give me an example of a question they might ask with regards about a new feature",
+        "arduino uno adalah",
+        "how edit array which is in object",
+        "how can my software company use Microsoft ENTRA to verify the identity of a user before accessing the software?",
+        "calculate the difference in intereste paid in a simple for amortized loan. terms: 125,000 loan, 3.25% interest over 30 years.",
+        "can i use spring state machine and workflow together and is it justified?",
+        'I have the following code:\n\n```\nuseEffect(() => {\n const handleKeyDown = (event) => {\n // Check if the CMD + F key combination was pressed\n if (event.key === "f" && event.metaKey) {\n event.preventDefault();\n\n setIsShown(true);\n }\n\n window.addEventListener("keydown", handleKeyDown);\n\n return () => {\n window.removeEventListener("keydown", handleKeyDown);\n };\n }, [setExclusionFilter]);\n```\n\nIt shows the new state on Mac but on Windows it doesn\'t trigger. How can I support windows?',
+        "What is the best marketing tactics for local small businesses?",
+        "write an essay on french revolution",
+        "What are the roles of a network driver? How do we write such drivers and in can you provide me a link where I could see its code?",
+        "Are you familiar with the SAS programming language?",
+        "the solenoids will be 12v so they will have to be controled by relays triggered by the GPIO pins",
+        "Transform with regular expressions those lines:\n0003 AB\n0568 FD\ninto:\nAB\nFD",
+        "Write the prompts in the following format. First sentence establishes a situation. Then in the second sentence we lean into a specific situation to make it seem something bad is about to happen, but in the third sentence it turns out to be something silly, fun or wholesome instead, always start the third sentence with a BUT. Some examples below\n\n-A hydra is hypnotizing an orc. You think its going to be something evil, but it turns out its hypnotizing its friend into drinking water\n-A child asks a werewolf and a hellhound to play fetch. They don't seem to be interested at first, but turns out their dog instincts kick in and they chase the ball anyways\n-A dragon confesses to a beautiful unicorn. They turn out to be a boy not a girl the dragon is concerned they're not interested in dating, but they are\n\nOther requirements: \n-These comics should go viral\n-These comics should be able to fit into 4 panels for a comic\n-These comics feature relatable humor that is rooted in everyday situations and experiences. \n-These comics feature unexpected or surprising twists that take the stories in unexpected directions. \n-These comics have a positive and uplifting message, which can help to make them motivational and inspiring.\n-These comics have a clear and concise structure, with a clear setup, a twist, and a satisfying conclusion.\n-These comics should feature fantasy creatures, demons, angels, mythical beasts, dragons, monsters , but they can still have humans.",
+        "How can we improve this comic to be simpler and funnier?\n\n[We see that this is a small reading club for woodland creatures. Make them all nice and cute, very winnie the pooh-esque, lol. The two characters that speak are animals, make Red into a herbivore race, like a rabbit or something, pink should be a small carnivore like a cat or badger? Red is confused, and red is excited]\nKnock Knock\nPink:Who\u2019s that?\nRed: Maybe a new member for our book club!\n\n[Panics as she sees a dragon licking their lips behind the curtain]\nRed: It\u2019s a dragon, run for your lives everyone!\n\n[Dragon mom is outside their home, looking dragon-eque but also waving her hands chibi cute apologetically, she\u2019s clearly a little embarrassed by the situation. Red looks at her suspiciously ]\nDragon:I\u2019m not here to eat anyone, I uh\u2026 heard you had a book club?\nRed: Uh\u2026yes\n\n[Dragon looks very excited and welcome, Pink seems like she likes the book, red looks a little grossed out ]\nDragon: Awesome, it's nice to meet you! I brought my favorite book too!\nPink: What a lovely book!\nRed: Ugh I\u2019ll pass on reading that.",
+        "Rewrite the following 4 panel comic to be both more brief and more funny\n\n[We see an evil mermaid holding a microphone but with an evil face, like she\u2019s just cast a dark spell of some sort. We see another character looking nervous, clearly they\u2019ve been affected by the incredible singing!]\nMermaid: You\u2019ve lost! Give up & spare us both the trouble!\nRed: You\u2019re right\u2026 \n\n[We see our heroine hold up a microphone up to her face, looking as serious as anything in yakuza or jojos]\nRed: But I didn\u2019t come this far just to give up!\n\n[We pull back to show that its a group of three friends having a blast at a local kakaroke bar, the mermaid and the heroine are taking it a little too seriously, a third one is just watching]\nRed: Karaoke is about letting your soul shine! I\u2019m giving it my all or die trying!\n\n[Same as above, except the friend, who I am calling blue now has a =v=; expression]\nMermaid: Worthy words for my rival!\nBlue: Girls, you need to chill. \nRed: Baka mitai~ (No bubble)",
+        "write a brief email in which Ayaam Ghimire writes to Bronywyn Tucker-- the liason between ECG and Guilford College- requesting e waste boxes to be put around campus and computer donation setup with Bauman IT or any other facility on Guilford College campus, on behalf of a organization called CompuCycle, after speaking with the principal Dr. Kash",
+        "I'm writing a software for conference calls.\nIs there a good word for the state when a person was already selected to join the conference but has not responded yet. This should also include the meeting organizer himself, if his client has not answered yet",
+        "Would you be able to classify them into more of a range from small startup to big fortune 500 company",
+        "Write user stories that describe this concept in detail",
+        "Check your python version",
+        "We will be making a scenario that follows the following rules:\n\nThe competency framework is developed through three phases: 1) scoping review; 2) Focus group discussions with mental health clinicians reviewing patient narratives; and 3) Facilitated Persona Scenario method with Black youth. Moreover, the project adopts a co-design approach and convenes a Knowledge User Panel. The panel will be involved in all phases of the competency framework development as they will review findings from the scoping review and focus groups. \n\nFocus group with mental health clinicians \n Mental health clinicians (i.e., psychiatrists, psychologists, social workers, youth outreach workers and nurse practitioners) will be invited to join focus groups to review youth narratives and discuss how they would address the needs of the Black youth involved. The youth narratives will be generated through collecting stories from social media and through an online survey. The survey will ask about young people's experiences with mental health conditions, their use of mental health services, and their suggestions for how to improve mental health care for young people. The online survey will collect stories anonymously. Anyone who submits a story through the survey will be redirected to a list of resources. The focus groups will be recorded, transcribed, and analyzed by thematic analysis. The focus groups will continue until thematic saturation.\n\nPhase 3: Persona Scenario method with Black youth\n Black youth will be invited to focus groups (or one-on-one interviews, if requested) using persona scenario methods. The findings from the focus groups with mental health clinicians will be used to create clinician personas, including information about their motivations, challenges and describe the different ways in which the clinician might interact with the Black youth based on youth narratives. Black youth will be asked to share their perspectives and preferred clinician responses. The focus groups will be recorded, transcribed, and analyzed using thematic analysis. We will continue to hold focus groups until thematic saturation.\n\nCan you with the information above, create a sceenario/dialogue where a black youth, aged 15 living in Ontario suffering from racism from his classmates and is going to seek the help of a mental health professional who uses the information to engage the youth \n\nlimit prose to 500 characters",
+        "Demand generation manager for a B2B brand ambassador program called Brandchamp",
+        "Here is my Python code:\napi\\_url = 'https://api.yelp.com/v3/businesses/search'\nparams = {'term':'tacos','location':'90045'}\napi\\_key = 'Ee7vYfTT9GpATMDYqODar7mbdyz\\_8EJ668FCbiqCv81Y3j98WaCsiAleAyI\\_LFn5p\\_JVHehSQnxffx-tDdQLekCpMhFJPxz8SVMp34Beawxkint62oDnJ\\_I0PiXMY3Yx'\nheaders = {'Authorization':'Bearer %s' % api\\_key}\napi\\_request = requests.get(api.\\_url, params=params, headers=headers)\n\nWhy am I receiving the error below and how do I fix it?\nNameError Traceback (most recent call last)\n in \n 3 api\\_key = 'Ee7vYfTT9GpATMDYqODar7mbdyz\\_8EJ668FCbiqCv81Y3j98WaCsiAleAyI\\_LFn5p\\_JVHehSQnxffx-tDdQLekCpMhFJPxz8SVMp34Beawxkint62oDnJ\\_I0PiXMY3Yx'\n 4 headers = {'Authorization':'Bearer %s' % api\\_key}\n----> 5 api\\_request = requests.get(api.\\_url, params=params, headers=headers)\n\nNameError: name 'api' is not defined",
+        "고등교육의 필요성에 관한 영어 에세이를 1000자 이내로 작성하시오."
+        "Which hero is the best in Heroes of Might and Magic 3?",
+        "Use C# to get the current YouTube thumbnail and convert it to Base64.",
+        "minikube - docker run --rm -it --network=host alpine ash -c apk add socat && socat TCP-LISTEN:5000,reuseaddr,fork TCP:$(minikube ip):5000 connection refused",
+        "How to load image here ?",
+    ]
+
+    responses = await generate_multi(flash_llama, prompts, max_new_tokens=10)
+
+    assert len(responses) == len(prompts)
+    outputs = [r.choices[0].message.content for r in responses]
+    assert outputs == [
+        "Jeff Walker's Product Launch Formula is a comprehensive system",
+        "Here are three key indicators to determine if a customer",
+        "You can use the `String.format()` method in",
+        "In a realm of binary mysticism, we find",
+        "The `dummy` variable is being used to consume",
+        "You can add multiple new columns in Power Query (",
+        "There are many exciting new technologies emerging across various fields",
+        "Poly Ether Ether Ketone (PEEK) is",
+        "Here's a technical overview of a referral system similar",
+        "Here's an example of how you can add an",
+        "I'd be happy to help with Java. What",
+        "I can help you plan a road trip from Pune",
+        "I'd be happy to explain more about a topic",
+        "I'd be happy to help you brainstorm and provide",
+        "Implementing a Minesweeper algorithm using algebraic",
+        "There are several issues with the provided code:\n\n1",
+        ";)",
+        "As I delved into the world of high-st",
+        "/u/CruxHub: Hi, I'm",
+        "To simulate a conversation between Alice and /u/C",
+        "Alice: Hey /u/CruxHub,",
+        "Alice: Hi /u/CruxHub,",
+        "/u/CruxHub: Hey Alice, I",
+        "/u/CruxHub: Hey Alice, I",
+        "/u/CruxHub: Hey Alice, I",
+        "The Dogme approach and the Lexical Approach are",
+        "It seems like you're trying to connect a GraphQL",
+        "Implementing a netfilter in Linux with a Rust",
+        "Damage to the Ulnar nerve can cause numb",
+        "The Space Shuttle's Reaction Control System (RCS",
+        "I can provide you with a basic Python script that",
+        "Farming meat has several negative impacts on the environment",
+        "The photograph filter you're referring to is called \"",
+        "Here's a sample geological database structure with some example",
+        "**Web Marketing: A Simplified Explanation**\n\nWeb",
+        "Here's a rewritten and improved version of the story",
+        "Here are the questions rewritten in a more conversational",
+        "**Learning Progress: 0%**\n\n| Topic",
+        "I couldn't find any information on a person named",
+        "Here's a list of the largest outdoor retailers in",
+        "To create a WordPress shortcode that includes Facebook SDK code",
+        "The sentence is mostly grammatically correct, but there",
+        "I'd be happy to engage in a debate with",
+        "I'd love to hear about your business. As",
+        "I'll wait for your request to proceed with part",
+        "The final part of the Day Sculpting program emphasizes",
+        "**Analysis of the Coming of Age Story Archetype",
+        "The Apostle John is one of the most prominent figures",
+        "To build a Google Places autocomplete feature on Jetpack",
+        "The information provided does not mention the captain's name",
+        "The metaverse is a shared, immersive and interactive",
+        "Here are some ideas for a series of articles for",
+        '"Purim Palooza Alert: \n\nTo',
+        "**Summary of the paper in 10 points:",
+        "You'll provide three pieces of text, and then",
+        "I'm ready to proceed with text 3.",
+        "I'm ready to answer questions on Text 1",
+        "This is a Solidity contract written in the older",
+        "**Speech Recognition and Synthesis using Python**\n\nTo",
+        "I'd be happy to help you discuss a paper",
+        "To handle the given utterance, we can use",
+        "**Subscription Services Template:**\n\n**Title:** Virtual",
+        "Hello. How can I assist you today?",
+        "Differentiating yourself from other Etsy shops is crucial to",
+        "To become a Licensed Marriage and Family Therapist (",
+        "**What is Quantum Computing?**\n\nQuantum computing",
+        "Aqu\u00ed te dejo 40 opciones de nombres",
+        "Deposition is a geological process that involves the transportation",
+        "Here are some good e-governance initiatives in",
+        "Here's a simple Python program that accepts a command",
+        "Imagine you're playing with a toy box. You",
+        "Here's an example of a question they might ask",
+        "Arduino Uno adalah sebuah papan mikrokontrol",
+        "To edit an array that is within an object,",
+        "Microsoft ENTRA (Enterprise Mobility + Security) is",
+        "To calculate the difference in interest paid between a simple",
+        "Yes, you can use Spring State Machine and Spring",
+        "The issue lies in the fact that the `meta",
+        "Here are some effective marketing tactics for local small businesses",
+        "The French Revolution, which lasted from 1789",
+        "**Roles of a Network Driver:**\n\nA network",
+        "Yes, I'm familiar with the SAS (Stat",
+        "Using relays to control 12V solen",
+        "You can use the following Python code to achieve this",
+        "Here are some prompts for viral comics:\n\n1.",
+        "To simplify and make the comic funnier, consider",
+        "Here's a rewritten version of the 4-panel",
+        "Subject: Request for E-Waste Collection and Computer",
+        "In the context of conference calls, the state you",
+        "I can provide a general classification of companies based on",
+        "Here are some user stories that describe the concept in",
+        "You can check your Python version by running the following",
+        "**Scenario:**\n\n15-year-old Black youth,",
+        "As a Demand Generation Manager for a B2B",
+        "The error is due to a typo in your code",
+        "고등교육의 필요성에 관한 영어 에",
+        "Here's a simple C# program that uses the",
+        'The error message "connection refused" indicates that the',
+        "To load an image, you can use various methods",
+    ]
+    assert responses == generous_response_snapshot
diff --git a/integration-tests/poetry.lock b/integration-tests/poetry.lock
index 8398160e..56b146bc 100644
--- a/integration-tests/poetry.lock
+++ b/integration-tests/poetry.lock
@@ -1,112 +1,127 @@
 # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.4.0"
+description = "Happy Eyeballs for asyncio"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "aiohappyeyeballs-2.4.0-py3-none-any.whl", hash = "sha256:7ce92076e249169a13c2f49320d1967425eaf1f407522d707d59cac7628d62bd"},
+    {file = "aiohappyeyeballs-2.4.0.tar.gz", hash = "sha256:55a1714f084e63d49639800f95716da97a1f173d46a16dfcfda0016abb93b6b2"},
+]
+
 [[package]]
 name = "aiohttp"
-version = "3.8.5"
+version = "3.10.5"
 description = "Async http client/server framework (asyncio)"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"},
-    {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"},
-    {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"},
-    {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"},
-    {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"},
-    {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"},
-    {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"},
-    {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"},
-    {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"},
-    {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"},
-    {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"},
-    {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"},
-    {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"},
-    {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"},
-    {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"},
-    {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"},
-    {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"},
-    {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"},
-    {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"},
-    {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"},
-    {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"},
-    {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"},
-    {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"},
-    {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"},
-    {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"},
-    {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"},
-    {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"},
-    {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"},
-    {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"},
-    {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"},
-    {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"},
-    {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"},
-    {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"},
-    {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"},
-    {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"},
-    {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"},
-    {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"},
-    {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"},
-    {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"},
-    {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"},
-    {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"},
-    {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"},
-    {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"},
-    {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"},
-    {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"},
-    {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"},
-    {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"},
-    {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"},
-    {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"},
-    {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"},
-    {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"},
-    {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"},
-    {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"},
-    {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"},
-    {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"},
-    {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"},
-    {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"},
-    {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"},
-    {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"},
-    {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"},
-    {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"},
-    {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"},
-    {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"},
+    {file = "aiohttp-3.10.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:18a01eba2574fb9edd5f6e5fb25f66e6ce061da5dab5db75e13fe1558142e0a3"},
+    {file = "aiohttp-3.10.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:94fac7c6e77ccb1ca91e9eb4cb0ac0270b9fb9b289738654120ba8cebb1189c6"},
+    {file = "aiohttp-3.10.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f1f1c75c395991ce9c94d3e4aa96e5c59c8356a15b1c9231e783865e2772699"},
+    {file = "aiohttp-3.10.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f7acae3cf1a2a2361ec4c8e787eaaa86a94171d2417aae53c0cca6ca3118ff6"},
+    {file = "aiohttp-3.10.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94c4381ffba9cc508b37d2e536b418d5ea9cfdc2848b9a7fea6aebad4ec6aac1"},
+    {file = "aiohttp-3.10.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c31ad0c0c507894e3eaa843415841995bf8de4d6b2d24c6e33099f4bc9fc0d4f"},
+    {file = "aiohttp-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0912b8a8fadeb32ff67a3ed44249448c20148397c1ed905d5dac185b4ca547bb"},
+    {file = "aiohttp-3.10.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d93400c18596b7dc4794d48a63fb361b01a0d8eb39f28800dc900c8fbdaca91"},
+    {file = "aiohttp-3.10.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d00f3c5e0d764a5c9aa5a62d99728c56d455310bcc288a79cab10157b3af426f"},
+    {file = "aiohttp-3.10.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d742c36ed44f2798c8d3f4bc511f479b9ceef2b93f348671184139e7d708042c"},
+    {file = "aiohttp-3.10.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:814375093edae5f1cb31e3407997cf3eacefb9010f96df10d64829362ae2df69"},
+    {file = "aiohttp-3.10.5-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8224f98be68a84b19f48e0bdc14224b5a71339aff3a27df69989fa47d01296f3"},
+    {file = "aiohttp-3.10.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d9a487ef090aea982d748b1b0d74fe7c3950b109df967630a20584f9a99c0683"},
+    {file = "aiohttp-3.10.5-cp310-cp310-win32.whl", hash = "sha256:d9ef084e3dc690ad50137cc05831c52b6ca428096e6deb3c43e95827f531d5ef"},
+    {file = "aiohttp-3.10.5-cp310-cp310-win_amd64.whl", hash = "sha256:66bf9234e08fe561dccd62083bf67400bdbf1c67ba9efdc3dac03650e97c6088"},
+    {file = "aiohttp-3.10.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8c6a4e5e40156d72a40241a25cc226051c0a8d816610097a8e8f517aeacd59a2"},
+    {file = "aiohttp-3.10.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c634a3207a5445be65536d38c13791904fda0748b9eabf908d3fe86a52941cf"},
+    {file = "aiohttp-3.10.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4aff049b5e629ef9b3e9e617fa6e2dfeda1bf87e01bcfecaf3949af9e210105e"},
+    {file = "aiohttp-3.10.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1942244f00baaacaa8155eca94dbd9e8cc7017deb69b75ef67c78e89fdad3c77"},
+    {file = "aiohttp-3.10.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e04a1f2a65ad2f93aa20f9ff9f1b672bf912413e5547f60749fa2ef8a644e061"},
+    {file = "aiohttp-3.10.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7f2bfc0032a00405d4af2ba27f3c429e851d04fad1e5ceee4080a1c570476697"},
+    {file = "aiohttp-3.10.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:424ae21498790e12eb759040bbb504e5e280cab64693d14775c54269fd1d2bb7"},
+    {file = "aiohttp-3.10.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:975218eee0e6d24eb336d0328c768ebc5d617609affaca5dbbd6dd1984f16ed0"},
+    {file = "aiohttp-3.10.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4120d7fefa1e2d8fb6f650b11489710091788de554e2b6f8347c7a20ceb003f5"},
+    {file = "aiohttp-3.10.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b90078989ef3fc45cf9221d3859acd1108af7560c52397ff4ace8ad7052a132e"},
+    {file = "aiohttp-3.10.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ba5a8b74c2a8af7d862399cdedce1533642fa727def0b8c3e3e02fcb52dca1b1"},
+    {file = "aiohttp-3.10.5-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:02594361128f780eecc2a29939d9dfc870e17b45178a867bf61a11b2a4367277"},
+    {file = "aiohttp-3.10.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8fb4fc029e135859f533025bc82047334e24b0d489e75513144f25408ecaf058"},
+    {file = "aiohttp-3.10.5-cp311-cp311-win32.whl", hash = "sha256:e1ca1ef5ba129718a8fc827b0867f6aa4e893c56eb00003b7367f8a733a9b072"},
+    {file = "aiohttp-3.10.5-cp311-cp311-win_amd64.whl", hash = "sha256:349ef8a73a7c5665cca65c88ab24abe75447e28aa3bc4c93ea5093474dfdf0ff"},
+    {file = "aiohttp-3.10.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:305be5ff2081fa1d283a76113b8df7a14c10d75602a38d9f012935df20731487"},
+    {file = "aiohttp-3.10.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3a1c32a19ee6bbde02f1cb189e13a71b321256cc1d431196a9f824050b160d5a"},
+    {file = "aiohttp-3.10.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:61645818edd40cc6f455b851277a21bf420ce347baa0b86eaa41d51ef58ba23d"},
+    {file = "aiohttp-3.10.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c225286f2b13bab5987425558baa5cbdb2bc925b2998038fa028245ef421e75"},
+    {file = "aiohttp-3.10.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ba01ebc6175e1e6b7275c907a3a36be48a2d487549b656aa90c8a910d9f3178"},
+    {file = "aiohttp-3.10.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8eaf44ccbc4e35762683078b72bf293f476561d8b68ec8a64f98cf32811c323e"},
+    {file = "aiohttp-3.10.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c43eb1ab7cbf411b8e387dc169acb31f0ca0d8c09ba63f9eac67829585b44f"},
+    {file = "aiohttp-3.10.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de7a5299827253023c55ea549444e058c0eb496931fa05d693b95140a947cb73"},
+    {file = "aiohttp-3.10.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4790f0e15f00058f7599dab2b206d3049d7ac464dc2e5eae0e93fa18aee9e7bf"},
+    {file = "aiohttp-3.10.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:44b324a6b8376a23e6ba25d368726ee3bc281e6ab306db80b5819999c737d820"},
+    {file = "aiohttp-3.10.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0d277cfb304118079e7044aad0b76685d30ecb86f83a0711fc5fb257ffe832ca"},
+    {file = "aiohttp-3.10.5-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:54d9ddea424cd19d3ff6128601a4a4d23d54a421f9b4c0fff740505813739a91"},
+    {file = "aiohttp-3.10.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4f1c9866ccf48a6df2b06823e6ae80573529f2af3a0992ec4fe75b1a510df8a6"},
+    {file = "aiohttp-3.10.5-cp312-cp312-win32.whl", hash = "sha256:dc4826823121783dccc0871e3f405417ac116055bf184ac04c36f98b75aacd12"},
+    {file = "aiohttp-3.10.5-cp312-cp312-win_amd64.whl", hash = "sha256:22c0a23a3b3138a6bf76fc553789cb1a703836da86b0f306b6f0dc1617398abc"},
+    {file = "aiohttp-3.10.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7f6b639c36734eaa80a6c152a238242bedcee9b953f23bb887e9102976343092"},
+    {file = "aiohttp-3.10.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f29930bc2921cef955ba39a3ff87d2c4398a0394ae217f41cb02d5c26c8b1b77"},
+    {file = "aiohttp-3.10.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f489a2c9e6455d87eabf907ac0b7d230a9786be43fbe884ad184ddf9e9c1e385"},
+    {file = "aiohttp-3.10.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:123dd5b16b75b2962d0fff566effb7a065e33cd4538c1692fb31c3bda2bfb972"},
+    {file = "aiohttp-3.10.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b98e698dc34966e5976e10bbca6d26d6724e6bdea853c7c10162a3235aba6e16"},
+    {file = "aiohttp-3.10.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3b9162bab7e42f21243effc822652dc5bb5e8ff42a4eb62fe7782bcbcdfacf6"},
+    {file = "aiohttp-3.10.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1923a5c44061bffd5eebeef58cecf68096e35003907d8201a4d0d6f6e387ccaa"},
+    {file = "aiohttp-3.10.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d55f011da0a843c3d3df2c2cf4e537b8070a419f891c930245f05d329c4b0689"},
+    {file = "aiohttp-3.10.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:afe16a84498441d05e9189a15900640a2d2b5e76cf4efe8cbb088ab4f112ee57"},
+    {file = "aiohttp-3.10.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8112fb501b1e0567a1251a2fd0747baae60a4ab325a871e975b7bb67e59221f"},
+    {file = "aiohttp-3.10.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1e72589da4c90337837fdfe2026ae1952c0f4a6e793adbbfbdd40efed7c63599"},
+    {file = "aiohttp-3.10.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4d46c7b4173415d8e583045fbc4daa48b40e31b19ce595b8d92cf639396c15d5"},
+    {file = "aiohttp-3.10.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33e6bc4bab477c772a541f76cd91e11ccb6d2efa2b8d7d7883591dfb523e5987"},
+    {file = "aiohttp-3.10.5-cp313-cp313-win32.whl", hash = "sha256:c58c6837a2c2a7cf3133983e64173aec11f9c2cd8e87ec2fdc16ce727bcf1a04"},
+    {file = "aiohttp-3.10.5-cp313-cp313-win_amd64.whl", hash = "sha256:38172a70005252b6893088c0f5e8a47d173df7cc2b2bd88650957eb84fcf5022"},
+    {file = "aiohttp-3.10.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f6f18898ace4bcd2d41a122916475344a87f1dfdec626ecde9ee802a711bc569"},
+    {file = "aiohttp-3.10.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5ede29d91a40ba22ac1b922ef510aab871652f6c88ef60b9dcdf773c6d32ad7a"},
+    {file = "aiohttp-3.10.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:673f988370f5954df96cc31fd99c7312a3af0a97f09e407399f61583f30da9bc"},
+    {file = "aiohttp-3.10.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58718e181c56a3c02d25b09d4115eb02aafe1a732ce5714ab70326d9776457c3"},
+    {file = "aiohttp-3.10.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b38b1570242fbab8d86a84128fb5b5234a2f70c2e32f3070143a6d94bc854cf"},
+    {file = "aiohttp-3.10.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:074d1bff0163e107e97bd48cad9f928fa5a3eb4b9d33366137ffce08a63e37fe"},
+    {file = "aiohttp-3.10.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd31f176429cecbc1ba499d4aba31aaccfea488f418d60376b911269d3b883c5"},
+    {file = "aiohttp-3.10.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7384d0b87d4635ec38db9263e6a3f1eb609e2e06087f0aa7f63b76833737b471"},
+    {file = "aiohttp-3.10.5-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8989f46f3d7ef79585e98fa991e6ded55d2f48ae56d2c9fa5e491a6e4effb589"},
+    {file = "aiohttp-3.10.5-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:c83f7a107abb89a227d6c454c613e7606c12a42b9a4ca9c5d7dad25d47c776ae"},
+    {file = "aiohttp-3.10.5-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:cde98f323d6bf161041e7627a5fd763f9fd829bcfcd089804a5fdce7bb6e1b7d"},
+    {file = "aiohttp-3.10.5-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:676f94c5480d8eefd97c0c7e3953315e4d8c2b71f3b49539beb2aa676c58272f"},
+    {file = "aiohttp-3.10.5-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:2d21ac12dc943c68135ff858c3a989f2194a709e6e10b4c8977d7fcd67dfd511"},
+    {file = "aiohttp-3.10.5-cp38-cp38-win32.whl", hash = "sha256:17e997105bd1a260850272bfb50e2a328e029c941c2708170d9d978d5a30ad9a"},
+    {file = "aiohttp-3.10.5-cp38-cp38-win_amd64.whl", hash = "sha256:1c19de68896747a2aa6257ae4cf6ef59d73917a36a35ee9d0a6f48cff0f94db8"},
+    {file = "aiohttp-3.10.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7e2fe37ac654032db1f3499fe56e77190282534810e2a8e833141a021faaab0e"},
+    {file = "aiohttp-3.10.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5bf3ead3cb66ab990ee2561373b009db5bc0e857549b6c9ba84b20bc462e172"},
+    {file = "aiohttp-3.10.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1b2c16a919d936ca87a3c5f0e43af12a89a3ce7ccbce59a2d6784caba945b68b"},
+    {file = "aiohttp-3.10.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad146dae5977c4dd435eb31373b3fe9b0b1bf26858c6fc452bf6af394067e10b"},
+    {file = "aiohttp-3.10.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c5c6fa16412b35999320f5c9690c0f554392dc222c04e559217e0f9ae244b92"},
+    {file = "aiohttp-3.10.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:95c4dc6f61d610bc0ee1edc6f29d993f10febfe5b76bb470b486d90bbece6b22"},
+    {file = "aiohttp-3.10.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da452c2c322e9ce0cfef392e469a26d63d42860f829026a63374fde6b5c5876f"},
+    {file = "aiohttp-3.10.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:898715cf566ec2869d5cb4d5fb4be408964704c46c96b4be267442d265390f32"},
+    {file = "aiohttp-3.10.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:391cc3a9c1527e424c6865e087897e766a917f15dddb360174a70467572ac6ce"},
+    {file = "aiohttp-3.10.5-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:380f926b51b92d02a34119d072f178d80bbda334d1a7e10fa22d467a66e494db"},
+    {file = "aiohttp-3.10.5-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce91db90dbf37bb6fa0997f26574107e1b9d5ff939315247b7e615baa8ec313b"},
+    {file = "aiohttp-3.10.5-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9093a81e18c45227eebe4c16124ebf3e0d893830c6aca7cc310bfca8fe59d857"},
+    {file = "aiohttp-3.10.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ee40b40aa753d844162dcc80d0fe256b87cba48ca0054f64e68000453caead11"},
+    {file = "aiohttp-3.10.5-cp39-cp39-win32.whl", hash = "sha256:03f2645adbe17f274444953bdea69f8327e9d278d961d85657cb0d06864814c1"},
+    {file = "aiohttp-3.10.5-cp39-cp39-win_amd64.whl", hash = "sha256:d17920f18e6ee090bdd3d0bfffd769d9f2cb4c8ffde3eb203777a3895c128862"},
+    {file = "aiohttp-3.10.5.tar.gz", hash = "sha256:f071854b47d39591ce9a17981c46790acb30518e2f83dfca8db2dfa091178691"},
 ]
 
 [package.dependencies]
+aiohappyeyeballs = ">=2.3.0"
 aiosignal = ">=1.1.2"
-async-timeout = ">=4.0.0a3,<5.0"
+async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
 attrs = ">=17.3.0"
-charset-normalizer = ">=2.0,<4.0"
 frozenlist = ">=1.1.1"
 multidict = ">=4.5,<7.0"
 yarl = ">=1.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns", "cchardet"]
+speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
 
 [[package]]
 name = "aiosignal"
@@ -124,13 +139,13 @@ frozenlist = ">=1.1.0"
 
 [[package]]
 name = "annotated-types"
-version = "0.6.0"
+version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
-    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
 ]
 
 [[package]]
@@ -146,115 +161,131 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "23.1.0"
+version = "24.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
-    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+    {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"},
+    {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
 ]
 
 [package.extras]
-cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]", "pre-commit"]
-docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
-tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
 
 [[package]]
 name = "certifi"
-version = "2023.7.22"
+version = "2024.8.30"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"},
-    {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"},
+    {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
+    {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
 ]
 
 [[package]]
 name = "charset-normalizer"
-version = "3.2.0"
+version = "3.3.2"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"},
-    {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"},
-    {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"},
-    {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"},
-    {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"},
-    {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"},
-    {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"},
+    {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
+    {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
 ]
 
 [[package]]
@@ -270,34 +301,35 @@ files = [
 
 [[package]]
 name = "docker"
-version = "6.1.3"
+version = "7.1.0"
 description = "A Python library for the Docker Engine API."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "docker-6.1.3-py3-none-any.whl", hash = "sha256:aecd2277b8bf8e506e484f6ab7aec39abe0038e29fa4a6d3ba86c3fe01844ed9"},
-    {file = "docker-6.1.3.tar.gz", hash = "sha256:aa6d17830045ba5ef0168d5eaa34d37beeb113948c413affe1d5991fc11f9a20"},
+    {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
+    {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
 ]
 
 [package.dependencies]
-packaging = ">=14.0"
 pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
 requests = ">=2.26.0"
 urllib3 = ">=1.26.0"
-websocket-client = ">=0.32.0"
 
 [package.extras]
+dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
+docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
 ssh = ["paramiko (>=2.4.3)"]
+websockets = ["websocket-client (>=1.3.0)"]
 
 [[package]]
 name = "exceptiongroup"
-version = "1.1.3"
+version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
-    {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
+    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
+    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
 ]
 
 [package.extras]
@@ -305,101 +337,115 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "filelock"
-version = "3.12.3"
+version = "3.16.0"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"},
-    {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"},
+    {file = "filelock-3.16.0-py3-none-any.whl", hash = "sha256:f6ed4c963184f4c84dd5557ce8fece759a3724b37b80c6c4f20a2f63a4dc6609"},
+    {file = "filelock-3.16.0.tar.gz", hash = "sha256:81de9eb8453c769b63369f87f11131a7ab04e367f8d97ad39dc230daa07e3bec"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""}
-
 [package.extras]
-docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
+docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.1.1)", "pytest (>=8.3.2)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.3)"]
+typing = ["typing-extensions (>=4.12.2)"]
 
 [[package]]
 name = "frozenlist"
-version = "1.4.0"
+version = "1.4.1"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"},
-    {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"},
+    {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"},
+    {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"},
 ]
 
 [[package]]
 name = "fsspec"
-version = "2023.6.0"
+version = "2024.9.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"},
-    {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"},
+    {file = "fsspec-2024.9.0-py3-none-any.whl", hash = "sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b"},
+    {file = "fsspec-2024.9.0.tar.gz", hash = "sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8"},
 ]
 
 [package.extras]
@@ -407,7 +453,8 @@ abfs = ["adlfs"]
 adl = ["adlfs"]
 arrow = ["pyarrow (>=1)"]
 dask = ["dask", "distributed"]
-devel = ["pytest", "pytest-cov"]
+dev = ["pre-commit", "ruff"]
+doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
 full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
@@ -417,29 +464,32 @@ github = ["requests"]
 gs = ["gcsfs"]
 gui = ["panel"]
 hdfs = ["pyarrow (>=1)"]
-http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"]
 libarchive = ["libarchive-c"]
 oci = ["ocifs"]
 s3 = ["s3fs"]
 sftp = ["paramiko"]
 smb = ["smbprotocol"]
 ssh = ["paramiko"]
+test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
+test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
 tqdm = ["tqdm"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.16.4"
+version = "0.24.6"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
-python-versions = ">=3.7.0"
+python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"},
-    {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
+    {file = "huggingface_hub-0.24.6-py3-none-any.whl", hash = "sha256:a990f3232aa985fe749bc9474060cbad75e8b2f115f6665a9fda5b9c97818970"},
+    {file = "huggingface_hub-0.24.6.tar.gz", hash = "sha256:cc2579e761d070713eaa9c323e3debe39d5b464ae3a7261c39a9195b27bb8000"},
 ]
 
 [package.dependencies]
 filelock = "*"
-fsspec = "*"
+fsspec = ">=2023.5.0"
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -447,26 +497,28 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
-inference = ["aiohttp", "pydantic"]
-quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"]
+hf-transfer = ["hf-transfer (>=0.1.4)"]
+inference = ["aiohttp", "minijinja (>=1.0)"]
+quality = ["mypy (==1.5.1)", "ruff (>=0.5.0)"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
-torch = ["torch"]
-typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+tensorflow-testing = ["keras (<3.0)", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+torch = ["safetensors[torch]", "torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
 
 [[package]]
 name = "idna"
-version = "3.4"
+version = "3.8"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.6"
 files = [
-    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
-    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+    {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"},
+    {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"},
 ]
 
 [[package]]
@@ -482,107 +534,173 @@ files = [
 
 [[package]]
 name = "multidict"
-version = "6.0.4"
+version = "6.1.0"
 description = "multidict implementation"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"},
-    {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"},
-    {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"},
-    {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"},
-    {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"},
-    {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"},
-    {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"},
-    {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"},
-    {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"},
-    {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"},
-    {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"},
-    {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"},
-    {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"},
-    {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"},
-    {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"},
+    {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
+    {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
+    {file = "multidict-6.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53"},
+    {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5"},
+    {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581"},
+    {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56"},
+    {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429"},
+    {file = "multidict-6.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748"},
+    {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db"},
+    {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056"},
+    {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76"},
+    {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160"},
+    {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7"},
+    {file = "multidict-6.1.0-cp310-cp310-win32.whl", hash = "sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0"},
+    {file = "multidict-6.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d"},
+    {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6"},
+    {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156"},
+    {file = "multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb"},
+    {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b"},
+    {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72"},
+    {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304"},
+    {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351"},
+    {file = "multidict-6.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb"},
+    {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3"},
+    {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399"},
+    {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423"},
+    {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3"},
+    {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753"},
+    {file = "multidict-6.1.0-cp311-cp311-win32.whl", hash = "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80"},
+    {file = "multidict-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926"},
+    {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa"},
+    {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436"},
+    {file = "multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761"},
+    {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e"},
+    {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef"},
+    {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95"},
+    {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925"},
+    {file = "multidict-6.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966"},
+    {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305"},
+    {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2"},
+    {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2"},
+    {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6"},
+    {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3"},
+    {file = "multidict-6.1.0-cp312-cp312-win32.whl", hash = "sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133"},
+    {file = "multidict-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1"},
+    {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008"},
+    {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f"},
+    {file = "multidict-6.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28"},
+    {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b"},
+    {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c"},
+    {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3"},
+    {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44"},
+    {file = "multidict-6.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2"},
+    {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3"},
+    {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa"},
+    {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa"},
+    {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4"},
+    {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6"},
+    {file = "multidict-6.1.0-cp313-cp313-win32.whl", hash = "sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81"},
+    {file = "multidict-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774"},
+    {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392"},
+    {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a"},
+    {file = "multidict-6.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2"},
+    {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc"},
+    {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478"},
+    {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4"},
+    {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d"},
+    {file = "multidict-6.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6"},
+    {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2"},
+    {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd"},
+    {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6"},
+    {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492"},
+    {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd"},
+    {file = "multidict-6.1.0-cp38-cp38-win32.whl", hash = "sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167"},
+    {file = "multidict-6.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef"},
+    {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c"},
+    {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1"},
+    {file = "multidict-6.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c"},
+    {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c"},
+    {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f"},
+    {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875"},
+    {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255"},
+    {file = "multidict-6.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30"},
+    {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057"},
+    {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657"},
+    {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28"},
+    {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972"},
+    {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43"},
+    {file = "multidict-6.1.0-cp39-cp39-win32.whl", hash = "sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada"},
+    {file = "multidict-6.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a"},
+    {file = "multidict-6.1.0-py3-none-any.whl", hash = "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506"},
+    {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"},
+]
+
+[package.dependencies]
+typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
+
+[[package]]
+name = "numpy"
+version = "1.26.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+    {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+    {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+    {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+    {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+    {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+    {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+    {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+    {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+    {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]
 
 [[package]]
 name = "packaging"
-version = "23.1"
+version = "24.1"
 description = "Core utilities for Python packages"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
-    {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
 ]
 
 [[package]]
 name = "pluggy"
-version = "1.3.0"
+version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
-    {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
+    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
 ]
 
 [package.extras]
@@ -591,109 +709,120 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pydantic"
-version = "2.6.4"
+version = "2.9.1"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.6.4-py3-none-any.whl", hash = "sha256:cc46fce86607580867bdc3361ad462bab9c222ef042d3da86f2fb333e1d916c5"},
-    {file = "pydantic-2.6.4.tar.gz", hash = "sha256:b1704e0847db01817624a6b86766967f552dd9dbf3afba4004409f908dcc84e6"},
+    {file = "pydantic-2.9.1-py3-none-any.whl", hash = "sha256:7aff4db5fdf3cf573d4b3c30926a510a10e19a0774d38fc4967f78beb6deb612"},
+    {file = "pydantic-2.9.1.tar.gz", hash = "sha256:1363c7d975c7036df0db2b4a61f2e062fbc0aa5ab5f2772e0ffc7191a4f4bce2"},
 ]
 
 [package.dependencies]
-annotated-types = ">=0.4.0"
-pydantic-core = "2.16.3"
-typing-extensions = ">=4.6.1"
+annotated-types = ">=0.6.0"
+pydantic-core = "2.23.3"
+typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""}
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.16.3"
-description = ""
+version = "2.23.3"
+description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.16.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:75b81e678d1c1ede0785c7f46690621e4c6e63ccd9192af1f0bd9d504bbb6bf4"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c865a7ee6f93783bd5d781af5a4c43dadc37053a5b42f7d18dc019f8c9d2bd1"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:162e498303d2b1c036b957a1278fa0899d02b2842f1ff901b6395104c5554a45"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f583bd01bbfbff4eaee0868e6fc607efdfcc2b03c1c766b06a707abbc856187"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b926dd38db1519ed3043a4de50214e0d600d404099c3392f098a7f9d75029ff8"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:716b542728d4c742353448765aa7cdaa519a7b82f9564130e2b3f6766018c9ec"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ad7f7ee1a13d9cb49d8198cd7d7e3aa93e425f371a68235f784e99741561f"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd87f48924f360e5d1c5f770d6155ce0e7d83f7b4e10c2f9ec001c73cf475c99"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0df446663464884297c793874573549229f9eca73b59360878f382a0fc085979"},
-    {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4df8a199d9f6afc5ae9a65f8f95ee52cae389a8c6b20163762bde0426275b7db"},
-    {file = "pydantic_core-2.16.3-cp310-none-win32.whl", hash = "sha256:456855f57b413f077dff513a5a28ed838dbbb15082ba00f80750377eed23d132"},
-    {file = "pydantic_core-2.16.3-cp310-none-win_amd64.whl", hash = "sha256:732da3243e1b8d3eab8c6ae23ae6a58548849d2e4a4e03a1924c8ddf71a387cb"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:519ae0312616026bf4cedc0fe459e982734f3ca82ee8c7246c19b650b60a5ee4"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b3992a322a5617ded0a9f23fd06dbc1e4bd7cf39bc4ccf344b10f80af58beacd"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d62da299c6ecb04df729e4b5c52dc0d53f4f8430b4492b93aa8de1f541c4aac"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2acca2be4bb2f2147ada8cac612f8a98fc09f41c89f87add7256ad27332c2fda"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b662180108c55dfbf1280d865b2d116633d436cfc0bba82323554873967b340"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7c6ed0dc9d8e65f24f5824291550139fe6f37fac03788d4580da0d33bc00c97"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1bb0827f56654b4437955555dc3aeeebeddc47c2d7ed575477f082622c49e"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e56f8186d6210ac7ece503193ec84104da7ceb98f68ce18c07282fcc2452e76f"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:936e5db01dd49476fa8f4383c259b8b1303d5dd5fb34c97de194560698cc2c5e"},
-    {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33809aebac276089b78db106ee692bdc9044710e26f24a9a2eaa35a0f9fa70ba"},
-    {file = "pydantic_core-2.16.3-cp311-none-win32.whl", hash = "sha256:ded1c35f15c9dea16ead9bffcde9bb5c7c031bff076355dc58dcb1cb436c4721"},
-    {file = "pydantic_core-2.16.3-cp311-none-win_amd64.whl", hash = "sha256:d89ca19cdd0dd5f31606a9329e309d4fcbb3df860960acec32630297d61820df"},
-    {file = "pydantic_core-2.16.3-cp311-none-win_arm64.whl", hash = "sha256:6162f8d2dc27ba21027f261e4fa26f8bcb3cf9784b7f9499466a311ac284b5b9"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f56ae86b60ea987ae8bcd6654a887238fd53d1384f9b222ac457070b7ac4cff"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9bd22a2a639e26171068f8ebb5400ce2c1bc7d17959f60a3b753ae13c632975"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4204e773b4b408062960e65468d5346bdfe139247ee5f1ca2a378983e11388a2"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f651dd19363c632f4abe3480a7c87a9773be27cfe1341aef06e8759599454120"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf09e615a0bf98d406657e0008e4a8701b11481840be7d31755dc9f97c44053"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8e47755d8152c1ab5b55928ab422a76e2e7b22b5ed8e90a7d584268dd49e9c6b"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:500960cb3a0543a724a81ba859da816e8cf01b0e6aaeedf2c3775d12ee49cade"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf6204fe865da605285c34cf1172879d0314ff267b1c35ff59de7154f35fdc2e"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d33dd21f572545649f90c38c227cc8631268ba25c460b5569abebdd0ec5974ca"},
-    {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49d5d58abd4b83fb8ce763be7794d09b2f50f10aa65c0f0c1696c677edeb7cbf"},
-    {file = "pydantic_core-2.16.3-cp312-none-win32.whl", hash = "sha256:f53aace168a2a10582e570b7736cc5bef12cae9cf21775e3eafac597e8551fbe"},
-    {file = "pydantic_core-2.16.3-cp312-none-win_amd64.whl", hash = "sha256:0d32576b1de5a30d9a97f300cc6a3f4694c428d956adbc7e6e2f9cad279e45ed"},
-    {file = "pydantic_core-2.16.3-cp312-none-win_arm64.whl", hash = "sha256:ec08be75bb268473677edb83ba71e7e74b43c008e4a7b1907c6d57e940bf34b6"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1f6f5938d63c6139860f044e2538baeee6f0b251a1816e7adb6cbce106a1f01"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2a1ef6a36fdbf71538142ed604ad19b82f67b05749512e47f247a6ddd06afdc7"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704d35ecc7e9c31d48926150afada60401c55efa3b46cd1ded5a01bdffaf1d48"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d937653a696465677ed583124b94a4b2d79f5e30b2c46115a68e482c6a591c8a"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9803edf8e29bd825f43481f19c37f50d2b01899448273b3a7758441b512acf8"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72282ad4892a9fb2da25defeac8c2e84352c108705c972db82ab121d15f14e6d"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f752826b5b8361193df55afcdf8ca6a57d0232653494ba473630a83ba50d8c9"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4384a8f68ddb31a0b0c3deae88765f5868a1b9148939c3f4121233314ad5532c"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4b2bf78342c40b3dc830880106f54328928ff03e357935ad26c7128bbd66ce8"},
-    {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:13dcc4802961b5f843a9385fc821a0b0135e8c07fc3d9949fd49627c1a5e6ae5"},
-    {file = "pydantic_core-2.16.3-cp38-none-win32.whl", hash = "sha256:e3e70c94a0c3841e6aa831edab1619ad5c511199be94d0c11ba75fe06efe107a"},
-    {file = "pydantic_core-2.16.3-cp38-none-win_amd64.whl", hash = "sha256:ecdf6bf5f578615f2e985a5e1f6572e23aa632c4bd1dc67f8f406d445ac115ed"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bda1ee3e08252b8d41fa5537413ffdddd58fa73107171a126d3b9ff001b9b820"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:21b888c973e4f26b7a96491c0965a8a312e13be108022ee510248fe379a5fa23"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be0ec334369316fa73448cc8c982c01e5d2a81c95969d58b8f6e272884df0074"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5b6079cc452a7c53dd378c6f881ac528246b3ac9aae0f8eef98498a75657805"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ee8d5f878dccb6d499ba4d30d757111847b6849ae07acdd1205fffa1fc1253c"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7233d65d9d651242a68801159763d09e9ec96e8a158dbf118dc090cd77a104c9"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6119dc90483a5cb50a1306adb8d52c66e447da88ea44f323e0ae1a5fcb14256"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:578114bc803a4c1ff9946d977c221e4376620a46cf78da267d946397dc9514a8"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8f99b147ff3fcf6b3cc60cb0c39ea443884d5559a30b1481e92495f2310ff2b"},
-    {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4ac6b4ce1e7283d715c4b729d8f9dab9627586dafce81d9eaa009dd7f25dd972"},
-    {file = "pydantic_core-2.16.3-cp39-none-win32.whl", hash = "sha256:e7774b570e61cb998490c5235740d475413a1f6de823169b4cf94e2fe9e9f6b2"},
-    {file = "pydantic_core-2.16.3-cp39-none-win_amd64.whl", hash = "sha256:9091632a25b8b87b9a605ec0e61f241c456e9248bfdcf7abdf344fdb169c81cf"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:36fa178aacbc277bc6b62a2c3da95226520da4f4e9e206fdf076484363895d2c"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dcca5d2bf65c6fb591fff92da03f94cd4f315972f97c21975398bd4bd046854a"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a72fb9963cba4cd5793854fd12f4cfee731e86df140f59ff52a49b3552db241"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60cc1a081f80a2105a59385b92d82278b15d80ebb3adb200542ae165cd7d183"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cbcc558401de90a746d02ef330c528f2e668c83350f045833543cd57ecead1ad"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fee427241c2d9fb7192b658190f9f5fd6dfe41e02f3c1489d2ec1e6a5ab1e04a"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4cb85f693044e0f71f394ff76c98ddc1bc0953e48c061725e540396d5c8a2e1"},
-    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b29eeb887aa931c2fcef5aa515d9d176d25006794610c264ddc114c053bf96fe"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a425479ee40ff021f8216c9d07a6a3b54b31c8267c6e17aa88b70d7ebd0e5e5b"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c5cbc703168d1b7a838668998308018a2718c2130595e8e190220238addc96f"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99b6add4c0b39a513d323d3b93bc173dac663c27b99860dd5bf491b240d26137"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f76ee558751746d6a38f89d60b6228fa174e5172d143886af0f85aa306fd89"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:00ee1c97b5364b84cb0bd82e9bbf645d5e2871fb8c58059d158412fee2d33d8a"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:287073c66748f624be4cef893ef9174e3eb88fe0b8a78dc22e88eca4bc357ca6"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed25e1835c00a332cb10c683cd39da96a719ab1dfc08427d476bce41b92531fc"},
-    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:86b3d0033580bd6bbe07590152007275bd7af95f98eaa5bd36f3da219dcd93da"},
-    {file = "pydantic_core-2.16.3.tar.gz", hash = "sha256:1cac689f80a3abab2d3c0048b29eea5751114054f032a941a32de4c852c59cad"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7f10a5d1b9281392f1bf507d16ac720e78285dfd635b05737c3911637601bae6"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c09a7885dd33ee8c65266e5aa7fb7e2f23d49d8043f089989726391dd7350c5"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6470b5a1ec4d1c2e9afe928c6cb37eb33381cab99292a708b8cb9aa89e62429b"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9172d2088e27d9a185ea0a6c8cebe227a9139fd90295221d7d495944d2367700"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86fc6c762ca7ac8fbbdff80d61b2c59fb6b7d144aa46e2d54d9e1b7b0e780e01"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0cb80fd5c2df4898693aa841425ea1727b1b6d2167448253077d2a49003e0ed"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03667cec5daf43ac4995cefa8aaf58f99de036204a37b889c24a80927b629cec"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:047531242f8e9c2db733599f1c612925de095e93c9cc0e599e96cf536aaf56ba"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5499798317fff7f25dbef9347f4451b91ac2a4330c6669821c8202fd354c7bee"},
+    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bbb5e45eab7624440516ee3722a3044b83fff4c0372efe183fd6ba678ff681fe"},
+    {file = "pydantic_core-2.23.3-cp310-none-win32.whl", hash = "sha256:8b5b3ed73abb147704a6e9f556d8c5cb078f8c095be4588e669d315e0d11893b"},
+    {file = "pydantic_core-2.23.3-cp310-none-win_amd64.whl", hash = "sha256:2b603cde285322758a0279995b5796d64b63060bfbe214b50a3ca23b5cee3e83"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c889fd87e1f1bbeb877c2ee56b63bb297de4636661cc9bbfcf4b34e5e925bc27"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea85bda3189fb27503af4c45273735bcde3dd31c1ab17d11f37b04877859ef45"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7f7f72f721223f33d3dc98a791666ebc6a91fa023ce63733709f4894a7dc611"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b2b55b0448e9da68f56b696f313949cda1039e8ec7b5d294285335b53104b61"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c24574c7e92e2c56379706b9a3f07c1e0c7f2f87a41b6ee86653100c4ce343e5"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2b05e6ccbee333a8f4b8f4d7c244fdb7a979e90977ad9c51ea31261e2085ce0"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c409ce1c219c091e47cb03feb3c4ed8c2b8e004efc940da0166aaee8f9d6c8"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d965e8b325f443ed3196db890d85dfebbb09f7384486a77461347f4adb1fa7f8"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f56af3a420fb1ffaf43ece3ea09c2d27c444e7c40dcb7c6e7cf57aae764f2b48"},
+    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b01a078dd4f9a52494370af21aa52964e0a96d4862ac64ff7cea06e0f12d2c5"},
+    {file = "pydantic_core-2.23.3-cp311-none-win32.whl", hash = "sha256:560e32f0df04ac69b3dd818f71339983f6d1f70eb99d4d1f8e9705fb6c34a5c1"},
+    {file = "pydantic_core-2.23.3-cp311-none-win_amd64.whl", hash = "sha256:c744fa100fdea0d000d8bcddee95213d2de2e95b9c12be083370b2072333a0fa"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e0ec50663feedf64d21bad0809f5857bac1ce91deded203efc4a84b31b2e4305"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db6e6afcb95edbe6b357786684b71008499836e91f2a4a1e55b840955b341dbb"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98ccd69edcf49f0875d86942f4418a4e83eb3047f20eb897bffa62a5d419c8fa"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a678c1ac5c5ec5685af0133262103defb427114e62eafeda12f1357a12140162"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01491d8b4d8db9f3391d93b0df60701e644ff0894352947f31fff3e52bd5c801"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fcf31facf2796a2d3b7fe338fe8640aa0166e4e55b4cb108dbfd1058049bf4cb"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7200fd561fb3be06827340da066df4311d0b6b8eb0c2116a110be5245dceb326"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc1636770a809dee2bd44dd74b89cc80eb41172bcad8af75dd0bc182c2666d4c"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:67a5def279309f2e23014b608c4150b0c2d323bd7bccd27ff07b001c12c2415c"},
+    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:748bdf985014c6dd3e1e4cc3db90f1c3ecc7246ff5a3cd4ddab20c768b2f1dab"},
+    {file = "pydantic_core-2.23.3-cp312-none-win32.whl", hash = "sha256:255ec6dcb899c115f1e2a64bc9ebc24cc0e3ab097775755244f77360d1f3c06c"},
+    {file = "pydantic_core-2.23.3-cp312-none-win_amd64.whl", hash = "sha256:40b8441be16c1e940abebed83cd006ddb9e3737a279e339dbd6d31578b802f7b"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6daaf5b1ba1369a22c8b050b643250e3e5efc6a78366d323294aee54953a4d5f"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d015e63b985a78a3d4ccffd3bdf22b7c20b3bbd4b8227809b3e8e75bc37f9cb2"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3fc572d9b5b5cfe13f8e8a6e26271d5d13f80173724b738557a8c7f3a8a3791"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6bd91345b5163ee7448bee201ed7dd601ca24f43f439109b0212e296eb5b423"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc379c73fd66606628b866f661e8785088afe2adaba78e6bbe80796baf708a63"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbdce4b47592f9e296e19ac31667daed8753c8367ebb34b9a9bd89dacaa299c9"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3cf31edf405a161a0adad83246568647c54404739b614b1ff43dad2b02e6d5"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e22b477bf90db71c156f89a55bfe4d25177b81fce4aa09294d9e805eec13855"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0a0137ddf462575d9bce863c4c95bac3493ba8e22f8c28ca94634b4a1d3e2bb4"},
+    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:203171e48946c3164fe7691fc349c79241ff8f28306abd4cad5f4f75ed80bc8d"},
+    {file = "pydantic_core-2.23.3-cp313-none-win32.whl", hash = "sha256:76bdab0de4acb3f119c2a4bff740e0c7dc2e6de7692774620f7452ce11ca76c8"},
+    {file = "pydantic_core-2.23.3-cp313-none-win_amd64.whl", hash = "sha256:37ba321ac2a46100c578a92e9a6aa33afe9ec99ffa084424291d84e456f490c1"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d063c6b9fed7d992bcbebfc9133f4c24b7a7f215d6b102f3e082b1117cddb72c"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6cb968da9a0746a0cf521b2b5ef25fc5a0bee9b9a1a8214e0a1cfaea5be7e8a4"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbefe079a520c5984e30e1f1f29325054b59534729c25b874a16a5048028d16"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbaaf2ef20d282659093913da9d402108203f7cb5955020bd8d1ae5a2325d1c4"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb539d7e5dc4aac345846f290cf504d2fd3c1be26ac4e8b5e4c2b688069ff4cf"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e6f33503c5495059148cc486867e1d24ca35df5fc064686e631e314d959ad5b"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04b07490bc2f6f2717b10c3969e1b830f5720b632f8ae2f3b8b1542394c47a8e"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03795b9e8a5d7fda05f3873efc3f59105e2dcff14231680296b87b80bb327295"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c483dab0f14b8d3f0df0c6c18d70b21b086f74c87ab03c59250dbf6d3c89baba"},
+    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b2682038e255e94baf2c473dca914a7460069171ff5cdd4080be18ab8a7fd6e"},
+    {file = "pydantic_core-2.23.3-cp38-none-win32.whl", hash = "sha256:f4a57db8966b3a1d1a350012839c6a0099f0898c56512dfade8a1fe5fb278710"},
+    {file = "pydantic_core-2.23.3-cp38-none-win_amd64.whl", hash = "sha256:13dd45ba2561603681a2676ca56006d6dee94493f03d5cadc055d2055615c3ea"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82da2f4703894134a9f000e24965df73cc103e31e8c31906cc1ee89fde72cbd8"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd9be0a42de08f4b58a3cc73a123f124f65c24698b95a54c1543065baca8cf0e"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89b731f25c80830c76fdb13705c68fef6a2b6dc494402987c7ea9584fe189f5d"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6de1ec30c4bb94f3a69c9f5f2182baeda5b809f806676675e9ef6b8dc936f28"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb68b41c3fa64587412b104294b9cbb027509dc2f6958446c502638d481525ef"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c3980f2843de5184656aab58698011b42763ccba11c4a8c35936c8dd6c7068c"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94f85614f2cba13f62c3c6481716e4adeae48e1eaa7e8bac379b9d177d93947a"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:510b7fb0a86dc8f10a8bb43bd2f97beb63cffad1203071dc434dac26453955cd"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1eba2f7ce3e30ee2170410e2171867ea73dbd692433b81a93758ab2de6c64835"},
+    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b259fd8409ab84b4041b7b3f24dcc41e4696f180b775961ca8142b5b21d0e70"},
+    {file = "pydantic_core-2.23.3-cp39-none-win32.whl", hash = "sha256:40d9bd259538dba2f40963286009bf7caf18b5112b19d2b55b09c14dde6db6a7"},
+    {file = "pydantic_core-2.23.3-cp39-none-win_amd64.whl", hash = "sha256:5a8cd3074a98ee70173a8633ad3c10e00dcb991ecec57263aacb4095c5efb958"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f399e8657c67313476a121a6944311fab377085ca7f490648c9af97fc732732d"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b5547d098c76e1694ba85f05b595720d7c60d342f24d5aad32c3049131fa5c4"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dda0290a6f608504882d9f7650975b4651ff91c85673341789a476b1159f211"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b6e5da855e9c55a0c67f4db8a492bf13d8d3316a59999cfbaf98cc6e401961"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:09e926397f392059ce0afdcac920df29d9c833256354d0c55f1584b0b70cf07e"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:87cfa0ed6b8c5bd6ae8b66de941cece179281239d482f363814d2b986b79cedc"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e61328920154b6a44d98cabcb709f10e8b74276bc709c9a513a8c37a18786cc4"},
+    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce3317d155628301d649fe5e16a99528d5680af4ec7aa70b90b8dacd2d725c9b"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e89513f014c6be0d17b00a9a7c81b1c426f4eb9224b15433f3d98c1a071f8433"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4f62c1c953d7ee375df5eb2e44ad50ce2f5aff931723b398b8bc6f0ac159791a"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2718443bc671c7ac331de4eef9b673063b10af32a0bb385019ad61dcf2cc8f6c"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d90e08b2727c5d01af1b5ef4121d2f0c99fbee692c762f4d9d0409c9da6541"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2b676583fc459c64146debea14ba3af54e540b61762dfc0613dc4e98c3f66eeb"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:50e4661f3337977740fdbfbae084ae5693e505ca2b3130a6d4eb0f2281dc43b8"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:68f4cf373f0de6abfe599a38307f4417c1c867ca381c03df27c873a9069cda25"},
+    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59d52cf01854cb26c46958552a21acb10dd78a52aa34c86f284e66b209db8cab"},
+    {file = "pydantic_core-2.23.3.tar.gz", hash = "sha256:3cb0f65d8b4121c1b015c60104a685feb929a29d7cf204387c7f2688c7974690"},
 ]
 
 [package.dependencies]
@@ -701,13 +830,13 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
 [[package]]
 name = "pytest"
-version = "7.4.0"
+version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"},
-    {file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"},
+    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
+    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 ]
 
 [package.dependencies]
@@ -723,13 +852,13 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 
 [[package]]
 name = "pytest-asyncio"
-version = "0.21.1"
+version = "0.21.2"
 description = "Pytest support for asyncio"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-asyncio-0.21.1.tar.gz", hash = "sha256:40a7eae6dded22c7b604986855ea48400ab15b069ae38116e8c01238e9eeb64d"},
-    {file = "pytest_asyncio-0.21.1-py3-none-any.whl", hash = "sha256:8666c1c8ac02631d7c51ba282e0c69a8a452b211ffedf2599099845da5c5c37b"},
+    {file = "pytest_asyncio-0.21.2-py3-none-any.whl", hash = "sha256:ab664c88bb7998f711d8039cacd4884da6430886ae8bbd4eded552ed2004f16b"},
+    {file = "pytest_asyncio-0.21.2.tar.gz", hash = "sha256:d67738fc232b94b326b9d060750beb16e0074210b98dd8b58a5239fa2a154f45"},
 ]
 
 [package.dependencies]
@@ -764,73 +893,75 @@ files = [
 
 [[package]]
 name = "pyyaml"
-version = "6.0.1"
+version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 files = [
-    {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
-    {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
-    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
-    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
-    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
-    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
-    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
-    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
-    {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
-    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
-    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
-    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
-    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
-    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
-    {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
-    {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
-    {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
-    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
-    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
-    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
-    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
-    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
-    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
-    {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
-    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
-    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
-    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
-    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
-    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
+    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
+    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
+    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
 ]
 
 [[package]]
 name = "requests"
-version = "2.31.0"
+version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
-    {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
+    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
 ]
 
 [package.dependencies]
@@ -886,13 +1017,13 @@ files = [
 
 [[package]]
 name = "tqdm"
-version = "4.66.1"
+version = "4.66.5"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"},
-    {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"},
+    {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"},
+    {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"},
 ]
 
 [package.dependencies]
@@ -906,129 +1037,131 @@ telegram = ["requests"]
 
 [[package]]
 name = "typing-extensions"
-version = "4.7.1"
-description = "Backported and Experimental Type Hints for Python 3.7+"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
-    {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [[package]]
 name = "urllib3"
-version = "2.0.4"
+version = "2.2.2"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
-    {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
+    {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
+    {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
 ]
 
 [package.extras]
 brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
-secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"]
+h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
-name = "websocket-client"
-version = "1.6.2"
-description = "WebSocket client for Python with low level API options"
+name = "yarl"
+version = "1.11.1"
+description = "Yet another URL library"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "websocket-client-1.6.2.tar.gz", hash = "sha256:53e95c826bf800c4c465f50093a8c4ff091c7327023b10bfaff40cf1ef170eaa"},
-    {file = "websocket_client-1.6.2-py3-none-any.whl", hash = "sha256:ce54f419dfae71f4bdba69ebe65bf7f0a93fe71bc009ad3a010aacc3eebad537"},
-]
-
-[package.extras]
-docs = ["Sphinx (>=6.0)", "sphinx-rtd-theme (>=1.1.0)"]
-optional = ["python-socks", "wsaccel"]
-test = ["websockets"]
-
-[[package]]
-name = "yarl"
-version = "1.9.2"
-description = "Yet another URL library"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"},
-    {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"},
-    {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"},
-    {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee4afac41415d52d53a9833ebae7e32b344be72835bbb589018c9e938045a560"},
-    {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bf345c3a4f5ba7f766430f97f9cc1320786f19584acc7086491f45524a551ac"},
-    {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a96c19c52ff442a808c105901d0bdfd2e28575b3d5f82e2f5fd67e20dc5f4ea"},
-    {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:891c0e3ec5ec881541f6c5113d8df0315ce5440e244a716b95f2525b7b9f3608"},
-    {file = "yarl-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3a53ba34a636a256d767c086ceb111358876e1fb6b50dfc4d3f4951d40133d5"},
-    {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:566185e8ebc0898b11f8026447eacd02e46226716229cea8db37496c8cdd26e0"},
-    {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2b0738fb871812722a0ac2154be1f049c6223b9f6f22eec352996b69775b36d4"},
-    {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:32f1d071b3f362c80f1a7d322bfd7b2d11e33d2adf395cc1dd4df36c9c243095"},
-    {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e9fdc7ac0d42bc3ea78818557fab03af6181e076a2944f43c38684b4b6bed8e3"},
-    {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56ff08ab5df8429901ebdc5d15941b59f6253393cb5da07b4170beefcf1b2528"},
-    {file = "yarl-1.9.2-cp310-cp310-win32.whl", hash = "sha256:8ea48e0a2f931064469bdabca50c2f578b565fc446f302a79ba6cc0ee7f384d3"},
-    {file = "yarl-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:50f33040f3836e912ed16d212f6cc1efb3231a8a60526a407aeb66c1c1956dde"},
-    {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:646d663eb2232d7909e6601f1a9107e66f9791f290a1b3dc7057818fe44fc2b6"},
-    {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aff634b15beff8902d1f918012fc2a42e0dbae6f469fce134c8a0dc51ca423bb"},
-    {file = "yarl-1.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83503934c6273806aed765035716216cc9ab4e0364f7f066227e1aaea90b8d0"},
-    {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b25322201585c69abc7b0e89e72790469f7dad90d26754717f3310bfe30331c2"},
-    {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22a94666751778629f1ec4280b08eb11815783c63f52092a5953faf73be24191"},
-    {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ec53a0ea2a80c5cd1ab397925f94bff59222aa3cf9c6da938ce05c9ec20428d"},
-    {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7"},
-    {file = "yarl-1.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832b7e711027c114d79dffb92576acd1bd2decc467dec60e1cac96912602d0e6"},
-    {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:95d2ecefbcf4e744ea952d073c6922e72ee650ffc79028eb1e320e732898d7e8"},
-    {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d4e2c6d555e77b37288eaf45b8f60f0737c9efa3452c6c44626a5455aeb250b9"},
-    {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:783185c75c12a017cc345015ea359cc801c3b29a2966c2655cd12b233bf5a2be"},
-    {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b8cc1863402472f16c600e3e93d542b7e7542a540f95c30afd472e8e549fc3f7"},
-    {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:822b30a0f22e588b32d3120f6d41e4ed021806418b4c9f0bc3048b8c8cb3f92a"},
-    {file = "yarl-1.9.2-cp311-cp311-win32.whl", hash = "sha256:a60347f234c2212a9f0361955007fcf4033a75bf600a33c88a0a8e91af77c0e8"},
-    {file = "yarl-1.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:be6b3fdec5c62f2a67cb3f8c6dbf56bbf3f61c0f046f84645cd1ca73532ea051"},
-    {file = "yarl-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38a3928ae37558bc1b559f67410df446d1fbfa87318b124bf5032c31e3447b74"},
-    {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac9bb4c5ce3975aeac288cfcb5061ce60e0d14d92209e780c93954076c7c4367"},
-    {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3da8a678ca8b96c8606bbb8bfacd99a12ad5dd288bc6f7979baddd62f71c63ef"},
-    {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13414591ff516e04fcdee8dc051c13fd3db13b673c7a4cb1350e6b2ad9639ad3"},
-    {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf74d08542c3a9ea97bb8f343d4fcbd4d8f91bba5ec9d5d7f792dbe727f88938"},
-    {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e7221580dc1db478464cfeef9b03b95c5852cc22894e418562997df0d074ccc"},
-    {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:494053246b119b041960ddcd20fd76224149cfea8ed8777b687358727911dd33"},
-    {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:52a25809fcbecfc63ac9ba0c0fb586f90837f5425edfd1ec9f3372b119585e45"},
-    {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e65610c5792870d45d7b68c677681376fcf9cc1c289f23e8e8b39c1485384185"},
-    {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1b1bba902cba32cdec51fca038fd53f8beee88b77efc373968d1ed021024cc04"},
-    {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:662e6016409828ee910f5d9602a2729a8a57d74b163c89a837de3fea050c7582"},
-    {file = "yarl-1.9.2-cp37-cp37m-win32.whl", hash = "sha256:f364d3480bffd3aa566e886587eaca7c8c04d74f6e8933f3f2c996b7f09bee1b"},
-    {file = "yarl-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6a5883464143ab3ae9ba68daae8e7c5c95b969462bbe42e2464d60e7e2698368"},
-    {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5610f80cf43b6202e2c33ba3ec2ee0a2884f8f423c8f4f62906731d876ef4fac"},
-    {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9a4e67ad7b646cd6f0938c7ebfd60e481b7410f574c560e455e938d2da8e0f4"},
-    {file = "yarl-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83fcc480d7549ccebe9415d96d9263e2d4226798c37ebd18c930fce43dfb9574"},
-    {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fcd436ea16fee7d4207c045b1e340020e58a2597301cfbcfdbe5abd2356c2fb"},
-    {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84e0b1599334b1e1478db01b756e55937d4614f8654311eb26012091be109d59"},
-    {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3458a24e4ea3fd8930e934c129b676c27452e4ebda80fbe47b56d8c6c7a63a9e"},
-    {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:838162460b3a08987546e881a2bfa573960bb559dfa739e7800ceeec92e64417"},
-    {file = "yarl-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e2d08f07a3d7d3e12549052eb5ad3eab1c349c53ac51c209a0e5991bbada78"},
-    {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de119f56f3c5f0e2fb4dee508531a32b069a5f2c6e827b272d1e0ff5ac040333"},
-    {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:149ddea5abf329752ea5051b61bd6c1d979e13fbf122d3a1f9f0c8be6cb6f63c"},
-    {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:674ca19cbee4a82c9f54e0d1eee28116e63bc6fd1e96c43031d11cbab8b2afd5"},
-    {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:9b3152f2f5677b997ae6c804b73da05a39daa6a9e85a512e0e6823d81cdad7cc"},
-    {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415d5a4b080dc9612b1b63cba008db84e908b95848369aa1da3686ae27b6d2b"},
-    {file = "yarl-1.9.2-cp38-cp38-win32.whl", hash = "sha256:f7a3d8146575e08c29ed1cd287068e6d02f1c7bdff8970db96683b9591b86ee7"},
-    {file = "yarl-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:63c48f6cef34e6319a74c727376e95626f84ea091f92c0250a98e53e62c77c72"},
-    {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75df5ef94c3fdc393c6b19d80e6ef1ecc9ae2f4263c09cacb178d871c02a5ba9"},
-    {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c027a6e96ef77d401d8d5a5c8d6bc478e8042f1e448272e8d9752cb0aff8b5c8"},
-    {file = "yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3b078dbe227f79be488ffcfc7a9edb3409d018e0952cf13f15fd6512847f3f7"},
-    {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59723a029760079b7d991a401386390c4be5bfec1e7dd83e25a6a0881859e716"},
-    {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b03917871bf859a81ccb180c9a2e6c1e04d2f6a51d953e6a5cdd70c93d4e5a2a"},
-    {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1012fa63eb6c032f3ce5d2171c267992ae0c00b9e164efe4d73db818465fac3"},
-    {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74dcbfe780e62f4b5a062714576f16c2f3493a0394e555ab141bf0d746bb955"},
-    {file = "yarl-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c56986609b057b4839968ba901944af91b8e92f1725d1a2d77cbac6972b9ed1"},
-    {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c315df3293cd521033533d242d15eab26583360b58f7ee5d9565f15fee1bef4"},
-    {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b7232f8dfbd225d57340e441d8caf8652a6acd06b389ea2d3222b8bc89cbfca6"},
-    {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:53338749febd28935d55b41bf0bcc79d634881195a39f6b2f767870b72514caf"},
-    {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3"},
-    {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8288d7cd28f8119b07dd49b7230d6b4562f9b61ee9a4ab02221060d21136be80"},
-    {file = "yarl-1.9.2-cp39-cp39-win32.whl", hash = "sha256:b124e2a6d223b65ba8768d5706d103280914d61f5cae3afbc50fc3dfcc016623"},
-    {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"},
-    {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"},
+    {file = "yarl-1.11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:400cd42185f92de559d29eeb529e71d80dfbd2f45c36844914a4a34297ca6f00"},
+    {file = "yarl-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8258c86f47e080a258993eed877d579c71da7bda26af86ce6c2d2d072c11320d"},
+    {file = "yarl-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2164cd9725092761fed26f299e3f276bb4b537ca58e6ff6b252eae9631b5c96e"},
+    {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08ea567c16f140af8ddc7cb58e27e9138a1386e3e6e53982abaa6f2377b38cc"},
+    {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:768ecc550096b028754ea28bf90fde071c379c62c43afa574edc6f33ee5daaec"},
+    {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2909fa3a7d249ef64eeb2faa04b7957e34fefb6ec9966506312349ed8a7e77bf"},
+    {file = "yarl-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01a8697ec24f17c349c4f655763c4db70eebc56a5f82995e5e26e837c6eb0e49"},
+    {file = "yarl-1.11.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e286580b6511aac7c3268a78cdb861ec739d3e5a2a53b4809faef6b49778eaff"},
+    {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4179522dc0305c3fc9782549175c8e8849252fefeb077c92a73889ccbcd508ad"},
+    {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:27fcb271a41b746bd0e2a92182df507e1c204759f460ff784ca614e12dd85145"},
+    {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f61db3b7e870914dbd9434b560075e0366771eecbe6d2b5561f5bc7485f39efd"},
+    {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:c92261eb2ad367629dc437536463dc934030c9e7caca861cc51990fe6c565f26"},
+    {file = "yarl-1.11.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d95b52fbef190ca87d8c42f49e314eace4fc52070f3dfa5f87a6594b0c1c6e46"},
+    {file = "yarl-1.11.1-cp310-cp310-win32.whl", hash = "sha256:489fa8bde4f1244ad6c5f6d11bb33e09cf0d1d0367edb197619c3e3fc06f3d91"},
+    {file = "yarl-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:476e20c433b356e16e9a141449f25161e6b69984fb4cdbd7cd4bd54c17844998"},
+    {file = "yarl-1.11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:946eedc12895873891aaceb39bceb484b4977f70373e0122da483f6c38faaa68"},
+    {file = "yarl-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21a7c12321436b066c11ec19c7e3cb9aec18884fe0d5b25d03d756a9e654edfe"},
+    {file = "yarl-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c35f493b867912f6fda721a59cc7c4766d382040bdf1ddaeeaa7fa4d072f4675"},
+    {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25861303e0be76b60fddc1250ec5986c42f0a5c0c50ff57cc30b1be199c00e63"},
+    {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4b53f73077e839b3f89c992223f15b1d2ab314bdbdf502afdc7bb18e95eae27"},
+    {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:327c724b01b8641a1bf1ab3b232fb638706e50f76c0b5bf16051ab65c868fac5"},
+    {file = "yarl-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4307d9a3417eea87715c9736d050c83e8c1904e9b7aada6ce61b46361b733d92"},
+    {file = "yarl-1.11.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a28bed68ab8fb7e380775f0029a079f08a17799cb3387a65d14ace16c12e2b"},
+    {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:067b961853c8e62725ff2893226fef3d0da060656a9827f3f520fb1d19b2b68a"},
+    {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8215f6f21394d1f46e222abeb06316e77ef328d628f593502d8fc2a9117bde83"},
+    {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:498442e3af2a860a663baa14fbf23fb04b0dd758039c0e7c8f91cb9279799bff"},
+    {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:69721b8effdb588cb055cc22f7c5105ca6fdaa5aeb3ea09021d517882c4a904c"},
+    {file = "yarl-1.11.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1e969fa4c1e0b1a391f3fcbcb9ec31e84440253325b534519be0d28f4b6b533e"},
+    {file = "yarl-1.11.1-cp311-cp311-win32.whl", hash = "sha256:7d51324a04fc4b0e097ff8a153e9276c2593106a811704025bbc1d6916f45ca6"},
+    {file = "yarl-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:15061ce6584ece023457fb8b7a7a69ec40bf7114d781a8c4f5dcd68e28b5c53b"},
+    {file = "yarl-1.11.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a4264515f9117be204935cd230fb2a052dd3792789cc94c101c535d349b3dab0"},
+    {file = "yarl-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f41fa79114a1d2eddb5eea7b912d6160508f57440bd302ce96eaa384914cd265"},
+    {file = "yarl-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:02da8759b47d964f9173c8675710720b468aa1c1693be0c9c64abb9d8d9a4867"},
+    {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9361628f28f48dcf8b2f528420d4d68102f593f9c2e592bfc842f5fb337e44fd"},
+    {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b91044952da03b6f95fdba398d7993dd983b64d3c31c358a4c89e3c19b6f7aef"},
+    {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74db2ef03b442276d25951749a803ddb6e270d02dda1d1c556f6ae595a0d76a8"},
+    {file = "yarl-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e975a2211952a8a083d1b9d9ba26472981ae338e720b419eb50535de3c02870"},
+    {file = "yarl-1.11.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8aef97ba1dd2138112890ef848e17d8526fe80b21f743b4ee65947ea184f07a2"},
+    {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a7915ea49b0c113641dc4d9338efa9bd66b6a9a485ffe75b9907e8573ca94b84"},
+    {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:504cf0d4c5e4579a51261d6091267f9fd997ef58558c4ffa7a3e1460bd2336fa"},
+    {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3de5292f9f0ee285e6bd168b2a77b2a00d74cbcfa420ed078456d3023d2f6dff"},
+    {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a34e1e30f1774fa35d37202bbeae62423e9a79d78d0874e5556a593479fdf239"},
+    {file = "yarl-1.11.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:66b63c504d2ca43bf7221a1f72fbe981ff56ecb39004c70a94485d13e37ebf45"},
+    {file = "yarl-1.11.1-cp312-cp312-win32.whl", hash = "sha256:a28b70c9e2213de425d9cba5ab2e7f7a1c8ca23a99c4b5159bf77b9c31251447"},
+    {file = "yarl-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:17b5a386d0d36fb828e2fb3ef08c8829c1ebf977eef88e5367d1c8c94b454639"},
+    {file = "yarl-1.11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1fa2e7a406fbd45b61b4433e3aa254a2c3e14c4b3186f6e952d08a730807fa0c"},
+    {file = "yarl-1.11.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:750f656832d7d3cb0c76be137ee79405cc17e792f31e0a01eee390e383b2936e"},
+    {file = "yarl-1.11.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b8486f322d8f6a38539136a22c55f94d269addb24db5cb6f61adc61eabc9d93"},
+    {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fce4da3703ee6048ad4138fe74619c50874afe98b1ad87b2698ef95bf92c96d"},
+    {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed653638ef669e0efc6fe2acb792275cb419bf9cb5c5049399f3556995f23c7"},
+    {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18ac56c9dd70941ecad42b5a906820824ca72ff84ad6fa18db33c2537ae2e089"},
+    {file = "yarl-1.11.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:688654f8507464745ab563b041d1fb7dab5d9912ca6b06e61d1c4708366832f5"},
+    {file = "yarl-1.11.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4973eac1e2ff63cf187073cd4e1f1148dcd119314ab79b88e1b3fad74a18c9d5"},
+    {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:964a428132227edff96d6f3cf261573cb0f1a60c9a764ce28cda9525f18f7786"},
+    {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6d23754b9939cbab02c63434776df1170e43b09c6a517585c7ce2b3d449b7318"},
+    {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c2dc4250fe94d8cd864d66018f8344d4af50e3758e9d725e94fecfa27588ff82"},
+    {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09696438cb43ea6f9492ef237761b043f9179f455f405279e609f2bc9100212a"},
+    {file = "yarl-1.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:999bfee0a5b7385a0af5ffb606393509cfde70ecca4f01c36985be6d33e336da"},
+    {file = "yarl-1.11.1-cp313-cp313-win32.whl", hash = "sha256:ce928c9c6409c79e10f39604a7e214b3cb69552952fbda8d836c052832e6a979"},
+    {file = "yarl-1.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:501c503eed2bb306638ccb60c174f856cc3246c861829ff40eaa80e2f0330367"},
+    {file = "yarl-1.11.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dae7bd0daeb33aa3e79e72877d3d51052e8b19c9025ecf0374f542ea8ec120e4"},
+    {file = "yarl-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3ff6b1617aa39279fe18a76c8d165469c48b159931d9b48239065767ee455b2b"},
+    {file = "yarl-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3257978c870728a52dcce8c2902bf01f6c53b65094b457bf87b2644ee6238ddc"},
+    {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f351fa31234699d6084ff98283cb1e852270fe9e250a3b3bf7804eb493bd937"},
+    {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8aef1b64da41d18026632d99a06b3fefe1d08e85dd81d849fa7c96301ed22f1b"},
+    {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7175a87ab8f7fbde37160a15e58e138ba3b2b0e05492d7351314a250d61b1591"},
+    {file = "yarl-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba444bdd4caa2a94456ef67a2f383710928820dd0117aae6650a4d17029fa25e"},
+    {file = "yarl-1.11.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ea9682124fc062e3d931c6911934a678cb28453f957ddccf51f568c2f2b5e05"},
+    {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8418c053aeb236b20b0ab8fa6bacfc2feaaf7d4683dd96528610989c99723d5f"},
+    {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:61a5f2c14d0a1adfdd82258f756b23a550c13ba4c86c84106be4c111a3a4e413"},
+    {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f3a6d90cab0bdf07df8f176eae3a07127daafcf7457b997b2bf46776da2c7eb7"},
+    {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:077da604852be488c9a05a524068cdae1e972b7dc02438161c32420fb4ec5e14"},
+    {file = "yarl-1.11.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:15439f3c5c72686b6c3ff235279630d08936ace67d0fe5c8d5bbc3ef06f5a420"},
+    {file = "yarl-1.11.1-cp38-cp38-win32.whl", hash = "sha256:238a21849dd7554cb4d25a14ffbfa0ef380bb7ba201f45b144a14454a72ffa5a"},
+    {file = "yarl-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:67459cf8cf31da0e2cbdb4b040507e535d25cfbb1604ca76396a3a66b8ba37a6"},
+    {file = "yarl-1.11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:884eab2ce97cbaf89f264372eae58388862c33c4f551c15680dd80f53c89a269"},
+    {file = "yarl-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a336eaa7ee7e87cdece3cedb395c9657d227bfceb6781295cf56abcd3386a26"},
+    {file = "yarl-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87f020d010ba80a247c4abc335fc13421037800ca20b42af5ae40e5fd75e7909"},
+    {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:637c7ddb585a62d4469f843dac221f23eec3cbad31693b23abbc2c366ad41ff4"},
+    {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48dfd117ab93f0129084577a07287376cc69c08138694396f305636e229caa1a"},
+    {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75e0ae31fb5ccab6eda09ba1494e87eb226dcbd2372dae96b87800e1dcc98804"},
+    {file = "yarl-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f46f81501160c28d0c0b7333b4f7be8983dbbc161983b6fb814024d1b4952f79"},
+    {file = "yarl-1.11.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04293941646647b3bfb1719d1d11ff1028e9c30199509a844da3c0f5919dc520"},
+    {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:250e888fa62d73e721f3041e3a9abf427788a1934b426b45e1b92f62c1f68366"},
+    {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e8f63904df26d1a66aabc141bfd258bf738b9bc7bc6bdef22713b4f5ef789a4c"},
+    {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:aac44097d838dda26526cffb63bdd8737a2dbdf5f2c68efb72ad83aec6673c7e"},
+    {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:267b24f891e74eccbdff42241c5fb4f974de2d6271dcc7d7e0c9ae1079a560d9"},
+    {file = "yarl-1.11.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6907daa4b9d7a688063ed098c472f96e8181733c525e03e866fb5db480a424df"},
+    {file = "yarl-1.11.1-cp39-cp39-win32.whl", hash = "sha256:14438dfc5015661f75f85bc5adad0743678eefee266ff0c9a8e32969d5d69f74"},
+    {file = "yarl-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:94d0caaa912bfcdc702a4204cd5e2bb01eb917fc4f5ea2315aa23962549561b0"},
+    {file = "yarl-1.11.1-py3-none-any.whl", hash = "sha256:72bf26f66456baa0584eff63e44545c9f0eaed9b73cb6601b647c91f14c11f38"},
+    {file = "yarl-1.11.1.tar.gz", hash = "sha256:1bb2d9e212fb7449b8fb73bc461b51eaa17cc8430b4a87d87be7b25052d92f53"},
 ]
 
 [package.dependencies]
@@ -1037,5 +1170,5 @@ multidict = ">=4.0"
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.9,<3.13"
-content-hash = "f5c65e704b02250d73055cd04efcc22f8fc36144eddfc447a71c3a061748db80"
+python-versions = ">=3.10,<3.13"
+content-hash = "310c0a2349bc0a0713b50f8482b4df4d07c48e0ce2c1bd91c30872f52de3fe1c"
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index 123c1167..afd57ea7 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -6,9 +6,10 @@ authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 
 [tool.poetry.dependencies]
 pydantic = "> 2, < 3"
-python = ">=3.9,<3.13"
+python = ">=3.10,<3.13"
 syrupy = "^4.7.1"
 text-generation = "^0.6.0"
 pytest = "^7.4.0"
 pytest-asyncio = "^0.21.1"
-docker = "^6.1.3"
+docker = "^7"
+numpy = "^1.20"
diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index f3f0569b..8bf6ba07 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -1,34 +1,35 @@
-aiohttp==3.8.5 ; python_version >= "3.9" and python_version < "3.13"
-aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "3.13"
-annotated-types==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.13"
-attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.13"
-charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-docker==6.1.3 ; python_version >= "3.9" and python_version < "3.13"
-exceptiongroup==1.1.3 ; python_version >= "3.9" and python_version < "3.11"
-filelock==3.12.3 ; python_version >= "3.9" and python_version < "3.13"
-frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
-iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
-multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
-packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
-pluggy==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
-pydantic-core==2.16.3 ; python_version >= "3.9" and python_version < "3.13"
-pydantic==2.6.4 ; python_version >= "3.9" and python_version < "3.13"
-pytest-asyncio==0.21.1 ; python_version >= "3.9" and python_version < "3.13"
-pytest==7.4.0 ; python_version >= "3.9" and python_version < "3.13"
-pywin32==306 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-syrupy==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
-text-generation==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
-tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.0.4 ; python_version >= "3.9" and python_version < "3.13"
-websocket-client==1.6.2 ; python_version >= "3.9" and python_version < "3.13"
-yarl==1.9.2 ; python_version >= "3.9" and python_version < "3.13"
+aiohappyeyeballs==2.4.0 ; python_version >= "3.10" and python_version < "3.13"
+aiohttp==3.10.5 ; python_version >= "3.10" and python_version < "3.13"
+aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "3.13"
+annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.13"
+async-timeout==4.0.3 ; python_version >= "3.10" and python_version < "3.11"
+attrs==24.2.0 ; python_version >= "3.10" and python_version < "3.13"
+certifi==2024.8.30 ; python_version >= "3.10" and python_version < "3.13"
+charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "3.13"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
+docker==7.1.0 ; python_version >= "3.10" and python_version < "3.13"
+exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11"
+filelock==3.16.0 ; python_version >= "3.10" and python_version < "3.13"
+frozenlist==1.4.1 ; python_version >= "3.10" and python_version < "3.13"
+fsspec==2024.9.0 ; python_version >= "3.10" and python_version < "3.13"
+huggingface-hub==0.24.6 ; python_version >= "3.10" and python_version < "3.13"
+idna==3.8 ; python_version >= "3.10" and python_version < "3.13"
+iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "3.13"
+multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.13"
+numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.13"
+packaging==24.1 ; python_version >= "3.10" and python_version < "3.13"
+pluggy==1.5.0 ; python_version >= "3.10" and python_version < "3.13"
+pydantic-core==2.23.3 ; python_version >= "3.10" and python_version < "3.13"
+pydantic==2.9.1 ; python_version >= "3.10" and python_version < "3.13"
+pytest-asyncio==0.21.2 ; python_version >= "3.10" and python_version < "3.13"
+pytest==7.4.4 ; python_version >= "3.10" and python_version < "3.13"
+pywin32==306 ; python_version >= "3.10" and python_version < "3.13" and sys_platform == "win32"
+pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.13"
+requests==2.32.3 ; python_version >= "3.10" and python_version < "3.13"
+syrupy==4.7.1 ; python_version >= "3.10" and python_version < "3.13"
+text-generation==0.6.1 ; python_version >= "3.10" and python_version < "3.13"
+tomli==2.0.1 ; python_version >= "3.10" and python_version < "3.11"
+tqdm==4.66.5 ; python_version >= "3.10" and python_version < "3.13"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.13"
+urllib3==2.2.2 ; python_version >= "3.10" and python_version < "3.13"
+yarl==1.11.1 ; python_version >= "3.10" and python_version < "3.13"
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 8e5c9dcd..2cdccfe0 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -1843,9 +1843,8 @@ fn main() -> Result<(), LauncherError> {
         shutdown.clone(),
         &shutdown_receiver,
     )
-    .map_err(|err| {
+    .inspect_err(|_| {
         shutdown_shards(shutdown.clone(), &shutdown_receiver);
-        err
     })?;
 
     // Default exit code
diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs
index 240282d9..4a2341da 100644
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@@ -336,6 +336,8 @@ pub enum InferError {
     ValidationError(#[from] ValidationError),
     #[error("Incomplete generation")]
     IncompleteGeneration,
+    #[error("Incomplete generation stream")]
+    IncompleteGenerationStream,
     #[error("Template error: {0}")]
     TemplateError(#[from] minijinja::Error),
     #[error("Missing template vatiable: {0}")]
@@ -351,6 +353,7 @@ impl InferError {
             InferError::Overloaded(_) => "overloaded",
             InferError::ValidationError(_) => "validation",
             InferError::IncompleteGeneration => "incomplete_generation",
+            InferError::IncompleteGenerationStream => "incomplete_generation_stream",
             InferError::TemplateError(_) => "template_error",
             InferError::MissingTemplateVariable(_) => "missing_template_variable",
             InferError::ToolError(_) => "tool_error",
diff --git a/router/src/server.rs b/router/src/server.rs
index fac56a77..6a04ab00 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -318,7 +318,10 @@ pub(crate) async fn generate_internal(
     metrics::counter!("tgi_request_count").increment(1);
 
     // Do not long ultra long inputs, like image payloads.
-    tracing::debug!("Input: {}", &req.inputs[..1000.min(req.inputs.len())]);
+    tracing::debug!(
+        "Input: {}",
+        &req.inputs.chars().take(1000).collect::<String>()
+    );
 
     let compute_characters = req.inputs.chars().count();
     let mut add_prompt = None;
@@ -674,7 +677,7 @@ async fn generate_stream_internal(
             // Check if generation reached the end
             // Skip if we already sent an error
             if !end_reached && !error {
-                let err = InferError::IncompleteGeneration;
+                let err = InferError::IncompleteGenerationStream;
                 metrics::counter!("tgi_request_failure", "err" => "incomplete").increment(1);
                 tracing::error!("{err}");
                 yield Ok(Event::from(err));
@@ -2555,6 +2558,7 @@ impl From<InferError> for (StatusCode, Json<ErrorResponse>) {
             InferError::Overloaded(_) => StatusCode::TOO_MANY_REQUESTS,
             InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
+            InferError::IncompleteGenerationStream => StatusCode::INTERNAL_SERVER_ERROR,
             InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::MissingTemplateVariable(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY,
diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer
index 3abb0491..f0a27622 100644
--- a/server/Makefile-flashinfer
+++ b/server/Makefile-flashinfer
@@ -1,2 +1,2 @@
 install-flashinfer:
-	pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4
+	pip install flashinfer==0.1.6 -i https://flashinfer.ai/whl/cu124/torch2.4
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 9a60d06c..65180499 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -515,6 +515,7 @@ class FlashCausalLMBatch(Batch):
         dtype: torch.dtype,
         device: torch.device,
     ) -> "FlashCausalLMBatch":
+        assert len(pb.requests) > 0
         batch_tokenized_inputs = cls.batch_tokenized_inputs(pb.requests, tokenizer)
         return cls.from_tokenized(pb, tokenizer, batch_tokenized_inputs, dtype, device)
 
@@ -640,6 +641,7 @@ class FlashCausalLMBatch(Batch):
         adapter_segments = torch.tensor(
             adapter_segments, dtype=torch.int32, device=device
         )
+        # assert sum(len(b) for b in block_tables) == (block_tables_tensor != 0).sum()
 
         return type(self)(
             batch_id=self.batch_id,
@@ -834,6 +836,8 @@ class FlashCausalLMBatch(Batch):
 
         start_slots = torch.concat(start_slots)
 
+        # assert sum(len(b) for b in block_tables) == (block_tables_tensor != 0).sum()
+
         next_token_chooser = HeterogeneousNextTokenChooser.from_pb(
             next_token_chooser_parameters,
             dtype=batches[0].next_token_chooser.dtype,
@@ -1150,27 +1154,6 @@ class FlashCausalLM(Model):
                 input_lengths=input_lengths,
                 prefix_lens=prefix_lengths,
             )
-
-        self.cuda_graphs[bs] = {
-            "input_ids": input_ids,
-            "position_ids": position_ids,
-            "kv_cache": self.kv_cache,
-            "block_tables": block_tables,
-            "slots": slots,
-            "input_lengths": input_lengths_tensor,
-            "prefix_lengths": prefix_lengths_tensor,
-        }
-        seqlen = Seqlen(
-            input_lengths=input_lengths_tensor,
-            prefix_lengths=prefix_lengths_tensor,
-            cu_seqlen_q=None,
-            max_q=1,
-            max_k=max_s,
-        )
-        graph = torch.cuda.CUDAGraph()
-        self.cuda_graphs[bs]["graph"] = graph
-
-        if ATTENTION == "flashinfer":
             from text_generation_server.layers.attention.flashinfer import (
                 create_decode_state_cuda_graphs,
             )
@@ -1187,21 +1170,38 @@ class FlashCausalLM(Model):
                 num_heads=self.num_heads,
                 num_kv_heads=self.num_kv_heads,
             )
-            self.cuda_graphs[bs]["state"] = state
         else:
             state = None
 
+        graph = torch.cuda.CUDAGraph()
+        self.cuda_graphs[bs] = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "kv_cache": self.kv_cache,
+            "block_tables": block_tables,
+            "slots": slots,
+            "input_lengths": input_lengths_tensor,
+            "prefix_lengths": prefix_lengths_tensor,
+            "state": state,
+            "graph": graph,
+        }
+
         torch.cuda.synchronize()
         # Run once outside to warmup
         with self._forward_context(
             block_tables=block_tables,
             cu_seqlen_prefill=None,
-            input_lengths=input_lengths,
             input_lengths_tensor=input_lengths_tensor,
             state=state,
-            prefix_lens=prefix_lengths,
             prefix_lens_tensor=prefix_lengths_tensor,
         ):
+            seqlen = Seqlen(
+                input_lengths=input_lengths_tensor,
+                prefix_lengths=prefix_lengths_tensor,
+                cu_seqlen_q=None,
+                max_q=1,
+                max_k=max_s,
+            )
             self.model.forward(
                 input_ids=input_ids,
                 position_ids=position_ids,
@@ -1214,6 +1214,7 @@ class FlashCausalLM(Model):
                 prefill_cache_indices=None,
                 lm_head_indices=None,
             )
+            del seqlen
 
             torch.cuda.synchronize()
 
@@ -1479,9 +1480,7 @@ class FlashCausalLM(Model):
             with self._forward_context(
                 block_tables=block_tables,
                 cu_seqlen_prefill=cu_seqlen_prefill,
-                input_lengths=batch.input_lengths,
-                input_lengths_tensor=input_lengths + prefix_lens_tensor,
-                prefix_lens=batch.prefix_lens,
+                input_lengths_tensor=input_lengths,
                 prefix_lens_tensor=prefix_lens_tensor,
             ):
                 max_k = (input_lengths + prefix_lens_tensor).max().item()
@@ -1519,26 +1518,28 @@ class FlashCausalLM(Model):
                 input_lengths=batch.input_lengths,
                 prefix_lens=batch.prefix_lens,
             )
+            # assert block_tables.shape[0] >= slots.shape[0]
             cuda_graph["block_tables"][: block_tables.shape[0]] = block_tables
         else:
             cuda_graph["block_tables"][
                 : block_tables.shape[0], : block_tables.shape[1]
             ] = block_tables
-        cuda_graph["slots"].fill_(-1)
+
+        # XXX: This is working only because block 0 is reserved for the healthcheck
+        # so it doesn't matter if we override it with bogus values.
+        cuda_graph["slots"].fill_(0)
         cuda_graph["slots"][: slots.shape[0]] = slots
         cuda_graph["input_lengths"].zero_()
-        cuda_graph["input_lengths"][: input_lengths.shape[0]] = (
-            input_lengths + prefix_lens_tensor
-        )
+        cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
+        cuda_graph["prefix_lengths"].zero_()
+        cuda_graph["prefix_lengths"][: prefix_lens_tensor.shape[0]] = prefix_lens_tensor
 
         with self._forward_context(
             block_tables=cuda_graph["block_tables"],
             cu_seqlen_prefill=None,
-            input_lengths=batch.input_lengths,
             input_lengths_tensor=cuda_graph["input_lengths"],
-            prefix_lens=batch.prefix_lens,
-            prefix_lens_tensor=prefix_lens_tensor,
-            state=cuda_graph.get("state"),
+            prefix_lens_tensor=cuda_graph["prefix_lengths"],
+            state=cuda_graph["state"],
         ):
             # Replay the graph
             cuda_graph["graph"].replay()
@@ -1767,7 +1768,7 @@ class FlashCausalLM(Model):
             left = 0
 
             if n_accepted_ids > 1:
-                log_master(logger.debug, f"Speculated ids {n_accepted_ids - 1}")
+                log_master(logger.debug, f"speculated ids {n_accepted_ids - 1}")
 
             current_stopped = False
             for j in range(index, index + n_accepted_ids):
@@ -1922,9 +1923,7 @@ class FlashCausalLM(Model):
         *,
         block_tables: torch.Tensor,
         cu_seqlen_prefill: Optional[torch.Tensor],
-        input_lengths: List[int],
         input_lengths_tensor: torch.Tensor,
-        prefix_lens: List[int],
         prefix_lens_tensor: torch.Tensor,
         state: Optional[Any] = None,
     ) -> ContextManager:
@@ -1950,7 +1949,7 @@ class FlashCausalLM(Model):
                 # ),
                 block_tables=block_tables,
                 cu_seqlens=cu_seqlen_prefill,
-                input_lengths=input_lengths_tensor,
+                input_lengths=input_lengths_tensor + prefix_lens_tensor,
                 num_heads=self.num_heads,
                 num_kv_heads=self.num_kv_heads,
                 head_size=self.head_size,
@@ -1960,7 +1959,7 @@ class FlashCausalLM(Model):
             assert input_lengths_tensor is not None
             return use_decode_state(
                 state=state if state is not None else self.decode_state,
-                input_lengths=input_lengths_tensor,
+                input_lengths=input_lengths_tensor + prefix_lens_tensor,
                 block_tables=block_tables,
                 num_heads=self.num_heads,
                 num_kv_heads=self.num_kv_heads,
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index d6cb36fa..7f7d2e4d 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -367,9 +367,7 @@ class VlmCausalLM(FlashCausalLM):
             with self._forward_context(
                 block_tables=block_tables,
                 cu_seqlen_prefill=cu_seqlen_prefill,
-                input_lengths=batch.input_lengths,
                 input_lengths_tensor=input_lengths,
-                prefix_lens=batch.prefix_lens,
                 prefix_lens_tensor=prefix_lens_tensor,
             ):
                 max_k = (input_lengths + prefix_lens_tensor).max().item()

From dae3bf1d87c13f1186eead4afbfb4a29e4a59f84 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 11 Sep 2024 22:41:56 +0200
Subject: [PATCH 28/37] Fix tokenization yi (#2507)

* Fixing odd tokenization self modifications on the Rust side (load and
resave in Python).

* Fixing the builds ?

* Fix the gh action?

* Fixing the location ?

* Validation is odd.

* Try a faster runner

* Upgrade python version.

* Remove sccache

* No sccache.

* Getting libpython maybe ?

* List stuff.

* Monkey it up.

* have no idea at this point

* Tmp.

* Shot in the dark.

* Tmate the hell out of this.

* Desperation.

* WTF.

* -y.

* Apparently 3.10 is not available anymore.

* Updating the dockerfile to make libpython discoverable at runtime too.

* Put back rust tests.

* Why do we want mkl on AMD ?

* Forcing 3.11 ?
---
 .github/workflows/tests.yaml                  |  40 +-----
 Cargo.lock                                    | 120 +++++++++++++++++-
 Cargo.toml                                    |   2 +-
 Dockerfile                                    |  40 +++---
 Dockerfile_amd                                |  27 ++--
 Dockerfile_intel                              |   5 +-
 backends/v3/src/queue.rs                      |   1 +
 router/Cargo.toml                             |   1 +
 router/src/server.rs                          | 113 +++++------------
 server/tests/models/test_bloom.py             |   2 +-
 server/tests/models/test_causal_lm.py         |   2 +-
 server/tests/models/test_seq2seq_lm.py        |   2 +-
 .../models/flash_causal_lm.py                 |   2 +
 13 files changed, 205 insertions(+), 152 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 6faabe3b..5ad0fd6a 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -17,19 +17,15 @@ concurrency:
 
 jobs:
   run_tests:
-    runs-on: ubuntu-latest
-
-    env:
-      SCCACHE_GHA_ENABLED: "on"
-      RUSTC_WRAPPER: /usr/local/bin/sccache
-      SCCACHE: 0.3.3
-
+    runs-on:
+      group: aws-highmemory-32-plus-priv
     steps:
       - uses: actions/checkout@v2
       - name: Set up Python
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v4
+        id: python
         with:
-          python-version: 3.9
+          python-version: 3.11
       - name: Install Rust
         uses: actions-rs/toolchain@v1
         with:
@@ -44,30 +40,9 @@ jobs:
         run: |
           sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
           sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
-      - name: Install sccache
-        run: |
-          curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache
-          chmod +x /usr/local/bin/sccache
-      - name: configure sccache
-        uses: actions/github-script@v6
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
-            core.exportVariable('SCCACHE_GHA_CACHE_TO', 'sccache-${{runner.os}}-${{github.ref_name}}');
-            core.exportVariable('SCCACHE_GHA_CACHE_FROM', 'sccache-${{runner.os}}-main,sccache-${{runner.os}}-');
-      - name: cargo registry cache
-        uses: actions/cache@v3
-        with:
-          key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}-${{ github.sha }}
-          restore-keys: |
-            cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}-
-            cargo-${{ runner.os }}-
-          path: |
-            ~/.cargo/registry
-            ~/.cargo/git
       - name: Install
         run: |
+          sudo apt install python3.11-dev -y
           make install-cpu
       - name: Run server tests
         run: |
@@ -82,6 +57,3 @@ jobs:
       - name: Run Rust tests
         run: |
           cargo test
-      - name: sccache stats
-        run: |
-          /usr/local/bin/sccache --show-stats
diff --git a/Cargo.lock b/Cargo.lock
index 00c7f005..06a61853 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2118,6 +2118,15 @@ version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "metrics"
 version = "0.23.0"
@@ -3112,6 +3121,69 @@ dependencies = [
  "prost 0.12.6",
 ]
 
+[[package]]
+name = "pyo3"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn 2.0.76",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn 2.0.76",
+]
+
 [[package]]
 name = "qoi"
 version = "0.4.1"
@@ -4068,7 +4140,7 @@ dependencies = [
  "pkg-config",
  "text-generation-router",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.19.1",
  "tokio",
  "tokio-stream",
  "tracing",
@@ -4091,7 +4163,7 @@ dependencies = [
  "tabled",
  "text-generation-client",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.20.0",
  "tokio",
  "tracing",
  "tracing-subscriber",
@@ -4161,6 +4233,7 @@ dependencies = [
  "once_cell",
  "opentelemetry 0.20.0",
  "opentelemetry-otlp",
+ "pyo3",
  "rand",
  "regex",
  "reqwest",
@@ -4168,7 +4241,7 @@ dependencies = [
  "serde_json",
  "sysinfo",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.20.0",
  "tokio",
  "tokio-stream",
  "tower-http",
@@ -4219,7 +4292,7 @@ dependencies = [
  "slotmap",
  "text-generation-router",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.20.0",
  "tokio",
  "tokio-stream",
  "tonic 0.10.2",
@@ -4374,6 +4447,39 @@ dependencies = [
  "unicode_categories",
 ]
 
+[[package]]
+name = "tokenizers"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70"
+dependencies = [
+ "aho-corasick",
+ "derive_builder",
+ "esaxx-rs",
+ "getrandom",
+ "hf-hub",
+ "indicatif",
+ "itertools 0.12.1",
+ "lazy_static",
+ "log",
+ "macro_rules_attribute",
+ "monostate",
+ "onig",
+ "paste",
+ "rand",
+ "rayon",
+ "rayon-cond",
+ "regex",
+ "regex-syntax 0.8.4",
+ "serde",
+ "serde_json",
+ "spm_precompiled",
+ "thiserror",
+ "unicode-normalization-alignments",
+ "unicode-segmentation",
+ "unicode_categories",
+]
+
 [[package]]
 name = "tokio"
 version = "1.39.3"
@@ -4839,6 +4945,12 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
 
+[[package]]
+name = "unindent"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
+
 [[package]]
 name = "untrusted"
 version = "0.7.1"
diff --git a/Cargo.toml b/Cargo.toml
index 79fda15d..a50bba24 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,7 +25,7 @@ homepage = "https://github.com/huggingface/text-generation-inference"
 
 [workspace.dependencies]
 base64 = "0.22.0"
-tokenizers = { version = "0.19.1", features = ["http"] }
+tokenizers = { version = "0.20.0", features = ["http"] }
 hf-hub = { version = "0.3.1", features = ["tokio"] }
 metrics = { version = "0.23.0" }
 metrics-exporter-prometheus = { version = "0.15.1", features = [] }
diff --git a/Dockerfile b/Dockerfile
index 0d0e89b1..37ced781 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,10 +13,13 @@ COPY benchmark benchmark
 COPY router router
 COPY backends backends
 COPY launcher launcher
+
 RUN cargo chef prepare --recipe-path recipe.json
 
 FROM chef AS builder
 
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    python3.11-dev
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
@@ -37,6 +40,7 @@ COPY router router
 COPY backends backends
 COPY launcher launcher
 RUN cargo build --profile release-opt
+RUN cargo build --profile release-opt
 
 # Python builder
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
@@ -45,7 +49,7 @@ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS pytorch-install
 # NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
 ARG PYTORCH_VERSION=2.4.0
 
-ARG PYTHON_VERSION=3.10
+ARG PYTHON_VERSION=3.11
 # Keep in sync with `server/pyproject.toml
 ARG CUDA_VERSION=12.4
 ARG MAMBA_VERSION=24.3.0-0
@@ -216,33 +220,33 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
 COPY --from=pytorch-install /opt/conda /opt/conda
 
 # Copy build artifacts from flash attention builder
-COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
+COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
+COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from flash attention v2 builder
-COPY --from=flash-att-v2-builder /opt/conda/lib/python3.10/site-packages/flash_attn_2_cuda.cpython-310-x86_64-linux-gnu.so /opt/conda/lib/python3.10/site-packages
+COPY --from=flash-att-v2-builder /opt/conda/lib/python3.11/site-packages/flash_attn_2_cuda.cpython-311-x86_64-linux-gnu.so /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from custom kernels builder
-COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from exllama kernels builder
-COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from exllamav2 kernels builder
-COPY --from=exllamav2-kernels-builder /usr/src/exllamav2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=exllamav2-kernels-builder /usr/src/exllamav2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from awq kernels builder
-COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from eetq kernels builder
-COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from lorax punica kernels builder
-COPY --from=lorax-punica-builder /usr/src/lorax-punica/server/punica_kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=lorax-punica-builder /usr/src/lorax-punica/server/punica_kernels/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from fbgemm builder
-COPY --from=fbgemm-builder /usr/src/fbgemm/fbgemm_gpu/_skbuild/linux-x86_64-3.10/cmake-install /opt/conda/lib/python3.10/site-packages
+COPY --from=fbgemm-builder /usr/src/fbgemm/fbgemm_gpu/_skbuild/linux-x86_64-3.11/cmake-install /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from vllm builder
-COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 # Copy build artifacts from mamba builder
-COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
-COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
-COPY --from=flashinfer-builder /opt/conda/lib/python3.10/site-packages/flashinfer/ /opt/conda/lib/python3.10/site-packages/flashinfer/
+COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages
+COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-311/ /opt/conda/lib/python3.11/site-packages
+COPY --from=flashinfer-builder /opt/conda/lib/python3.11/site-packages/flashinfer/ /opt/conda/lib/python3.11/site-packages/flashinfer/
 
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
@@ -257,7 +261,9 @@ RUN cd server && \
     pip install ".[bnb, accelerate, marlin, quantize, peft, outlines]" --no-cache-dir && \
     pip install nvidia-nccl-cu12==2.22.3
 
-ENV LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2
+ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
+# Required to find libpython within the rust binaries
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 # This is needed because exl2 tries to load flash-attn
 # And fails with our builds.
 ENV EXLLAMA_NO_FLASH_ATTN=1
diff --git a/Dockerfile_amd b/Dockerfile_amd
index 8cb699dd..a79aae48 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -17,6 +17,8 @@ RUN cargo chef prepare --recipe-path recipe.json
 
 FROM chef AS builder
 
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    python3.11-dev
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
@@ -64,14 +66,14 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
     hipsolver-dev \
     rccl-dev \
     cmake \
-    python3-dev && \
+    python3.11-dev && \
     rm -rf /var/lib/apt/lists/*
 
 # Keep in sync with `server/pyproject.toml
 ARG MAMBA_VERSION=23.1.0-1
 ARG PYTORCH_VERSION='2.3.0'
 ARG ROCM_VERSION='6.0.2'
-ARG PYTHON_VERSION='3.10.10'
+ARG PYTHON_VERSION='3.11.10'
 # Automatically set by buildx
 ARG TARGETPLATFORM
 ENV PATH /opt/conda/bin:$PATH
@@ -89,10 +91,18 @@ RUN chmod +x ~/mambaforge.sh && \
     mamba init && \
     rm ~/mambaforge.sh
 
+# RUN conda install intel::mkl-static intel::mkl-include
+# Install pytorch
+# On arm64 we exit with an error code
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  exit 1 ;; \
+         *)              /opt/conda/bin/conda update -y conda &&  \
+                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
 # Install flash-attention, torch dependencies
 RUN pip install numpy einops ninja --no-cache-dir
 
-RUN conda install intel::mkl-static intel::mkl-include
 RUN pip uninstall -y triton && \
     git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \
     cd triton/python && \
@@ -172,19 +182,19 @@ ENV HF_HOME=/data \
     PORT=80
 
 # Copy builds artifacts from vllm builder
-COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from flash attention v2 builder
-COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from custom kernels builder
-COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from exllama kernels builder
-COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Copy build artifacts from exllamav2 kernels builder
-COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
 
 # Install server
 COPY proto proto
@@ -201,6 +211,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/l
 COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
 # Install launcher
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 
 # AWS Sagemaker compatible image
 FROM base AS sagemaker
diff --git a/Dockerfile_intel b/Dockerfile_intel
index 0cda4d4b..d7ac09af 100644
--- a/Dockerfile_intel
+++ b/Dockerfile_intel
@@ -18,6 +18,8 @@ RUN cargo chef prepare --recipe-path recipe.json
 
 FROM chef AS builder
 
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    python3.11-dev
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
@@ -114,7 +116,7 @@ ENV HUGGINGFACE_HUB_CACHE=/data \
     PORT=80
 
 ARG MAMBA_VERSION=23.1.0-1
-ARG PYTHON_VERSION='3.10.10'
+ARG PYTHON_VERSION='3.11.10'
 # Automatically set by buildx
 ARG TARGETPLATFORM
 ENV PATH /opt/conda/bin:$PATH
@@ -153,6 +155,7 @@ ENV CCL_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
 ENV I_MPI_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
 ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
 ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/lib
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 
 # Install server
 COPY proto proto
diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs
index 978a495c..2bbb6753 100644
--- a/backends/v3/src/queue.rs
+++ b/backends/v3/src/queue.rs
@@ -357,6 +357,7 @@ impl State {
             let block_allocation = if let (Some((tokens, input_ids)), Some(block_allocator)) =
                 (block_allocation, &self.block_allocator)
             {
+                tracing::debug!("Allocating {tokens} with {input_ids:?}");
                 match block_allocator.allocate(tokens, input_ids).await {
                     None => {
                         // Entry is over budget
diff --git a/router/Cargo.toml b/router/Cargo.toml
index 5c328e8a..6a752db6 100644
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@@ -61,6 +61,7 @@ uuid = { version = "1.9.1", default-features = false, features = [
 ] }
 csv = "1.3.0"
 ureq = "=2.9"
+pyo3 = { version = "0.22.2", features = ["auto-initialize"] }
 
 
 [build-dependencies]
diff --git a/router/src/server.rs b/router/src/server.rs
index 6a04ab00..8bd49b93 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -41,6 +41,7 @@ use hf_hub::api::tokio::{Api, ApiBuilder, ApiRepo};
 use hf_hub::{Cache, Repo, RepoType};
 use http::header::AUTHORIZATION;
 use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
+use pyo3::types::IntoPyDict;
 use serde_json::Value;
 use std::convert::Infallible;
 use std::fs::File;
@@ -48,7 +49,6 @@ use std::io::BufReader;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
 use std::path::{Path, PathBuf};
 use thiserror::Error;
-use tokenizers::processors::template::TemplateProcessing;
 use tokenizers::Tokenizer;
 use tokio::select;
 use tokio::signal;
@@ -1860,18 +1860,34 @@ pub async fn run(
     });
 
     let tokenizer: Option<Tokenizer> = tokenizer_filename.and_then(|filename| {
-        let mut tokenizer = Tokenizer::from_file(filename).ok();
-        if let Some(tokenizer) = &mut tokenizer {
-            if let Some(class) = &tokenizer_config.tokenizer_class {
-                if class == "LlamaTokenizer" || class == "LlamaTokenizerFast"{
-                    if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) {
-                        tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205");
-                        tokenizer.with_post_processor(post_processor);
-                    }
-                }
-            }
-        }
-        tokenizer
+        use pyo3::prelude::*;
+        let convert = pyo3::Python::with_gil(|py| -> PyResult<()> {
+            let transformers = py.import_bound("transformers")?;
+            let auto = transformers.getattr("AutoTokenizer")?;
+            let from_pretrained = auto.getattr("from_pretrained")?;
+            let args = (tokenizer_name.to_string(),);
+            let kwargs = [(
+                "revision",
+                revision.clone().unwrap_or_else(|| "main".to_string()),
+            )]
+            .into_py_dict_bound(py);
+            let tokenizer = from_pretrained.call(args, Some(&kwargs))?;
+            let save = tokenizer.getattr("save_pretrained")?;
+            let args = ("out".to_string(),);
+            save.call1(args)?;
+            Ok(())
+        })
+        .inspect_err(|err| {
+            tracing::error!("Failed to import python tokenizer {err}");
+        });
+        let filename = if convert.is_ok() {
+            // If we have correctly loaded and resaved with transformers
+            // We might have modified the tokenizer.json according to transformers
+            "out/tokenizer.json".into()
+        } else {
+            filename
+        };
+        Tokenizer::from_file(filename).ok()
     });
 
     let config: Option<Config> = config_filename.and_then(|filename| {
@@ -2591,77 +2607,6 @@ pub enum WebServerError {
     Axum(#[from] axum::BoxError),
 }
 
-/// Create a post_processor for the LlamaTokenizer
-fn create_post_processor(
-    tokenizer: &Tokenizer,
-    tokenizer_config: &HubTokenizerConfig,
-) -> Result<TemplateProcessing, tokenizers::processors::template::TemplateProcessingBuilderError> {
-    let add_bos_token = tokenizer_config.add_bos_token.unwrap_or(true);
-    let add_eos_token = tokenizer_config.add_eos_token.unwrap_or(false);
-
-    let bos_token = tokenizer_config.bos_token.as_ref();
-    let eos_token = tokenizer_config.eos_token.as_ref();
-
-    if add_bos_token && bos_token.is_none() {
-        panic!("add_bos_token = true but bos_token is None");
-    }
-
-    if add_eos_token && eos_token.is_none() {
-        panic!("add_eos_token = true but eos_token is None");
-    }
-
-    let mut single = Vec::new();
-    let mut pair = Vec::new();
-    let mut special_tokens = Vec::new();
-
-    if add_bos_token {
-        if let Some(bos) = bos_token {
-            let bos_token_id = tokenizer
-                .token_to_id(bos.as_str())
-                .expect("Should have found the bos token id");
-            special_tokens.push((bos.as_str(), bos_token_id));
-            single.push(format!("{}:0", bos.as_str()));
-            pair.push(format!("{}:0", bos.as_str()));
-        }
-    }
-
-    single.push("$A:0".to_string());
-    pair.push("$A:0".to_string());
-
-    if add_eos_token {
-        if let Some(eos) = eos_token {
-            let eos_token_id = tokenizer
-                .token_to_id(eos.as_str())
-                .expect("Should have found the eos token id");
-            special_tokens.push((eos.as_str(), eos_token_id));
-            single.push(format!("{}:0", eos.as_str()));
-            pair.push(format!("{}:0", eos.as_str()));
-        }
-    }
-
-    if add_bos_token {
-        if let Some(bos) = bos_token {
-            pair.push(format!("{}:1", bos.as_str()));
-        }
-    }
-
-    pair.push("$B:1".to_string());
-
-    if add_eos_token {
-        if let Some(eos) = eos_token {
-            pair.push(format!("{}:1", eos.as_str()));
-        }
-    }
-
-    let post_processor = TemplateProcessing::builder()
-        .try_single(single)?
-        .try_pair(pair)?
-        .special_tokens(special_tokens)
-        .build()?;
-
-    Ok(post_processor)
-}
-
 type PreparedInput = (String, Option<GrammarType>, bool);
 
 fn prepare_chat_input(
diff --git a/server/tests/models/test_bloom.py b/server/tests/models/test_bloom.py
index 08292920..dff5239b 100644
--- a/server/tests/models/test_bloom.py
+++ b/server/tests/models/test_bloom.py
@@ -267,7 +267,7 @@ def test_batch_concatenate(
     assert next_batch.max_input_length == 3
 
     assert next_batch.requests[0] == next_batch_0.requests[0]
-    assert next_batch.requests[1:] == next_batch_1.requests
+    assert next_batch.requests[1:] == list(next_batch_1.requests)
 
     assert next_batch.next_token_choosers[0] == next_batch_0.next_token_choosers[0]
     assert next_batch.next_token_choosers[1:] == next_batch_1.next_token_choosers
diff --git a/server/tests/models/test_causal_lm.py b/server/tests/models/test_causal_lm.py
index c000ef26..11ca5efe 100644
--- a/server/tests/models/test_causal_lm.py
+++ b/server/tests/models/test_causal_lm.py
@@ -262,7 +262,7 @@ def test_batch_concatenate(
     assert next_batch.max_input_length == 3
 
     assert next_batch.requests[0] == next_batch_0.requests[0]
-    assert next_batch.requests[1:] == next_batch_1.requests
+    assert next_batch.requests[1:] == list(next_batch_1.requests)
 
     assert next_batch.next_token_choosers[0] == next_batch_0.next_token_choosers[0]
     assert next_batch.next_token_choosers[1:] == next_batch_1.next_token_choosers
diff --git a/server/tests/models/test_seq2seq_lm.py b/server/tests/models/test_seq2seq_lm.py
index 02666042..5ba7c64c 100644
--- a/server/tests/models/test_seq2seq_lm.py
+++ b/server/tests/models/test_seq2seq_lm.py
@@ -281,7 +281,7 @@ def test_batch_concatenate(
     assert next_batch.max_decoder_input_length == 3
 
     assert next_batch.requests[0] == next_batch_0.requests[0]
-    assert next_batch.requests[1:] == next_batch_1.requests
+    assert next_batch.requests[1:] == list(next_batch_1.requests)
 
     assert next_batch.next_token_choosers[0] == next_batch_0.next_token_choosers[0]
     assert next_batch.next_token_choosers[1:] == next_batch_1.next_token_choosers
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 65180499..834056aa 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -272,6 +272,8 @@ class FlashCausalLMBatch(Batch):
                 assert prefix_len > 0
                 prefix_len -= 1
 
+            # Commented as it's costly.
+            # log_master(logger.debug, "Tokenized input ids {tokenized_input}")
             prefix_ids.append(tokenized_input[:prefix_len])
             tokenized_input = tokenized_input[prefix_len:]
 

From 69e3be20fb52a30441fd733bf7d383e72da5d46c Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 11 Sep 2024 22:45:19 +0200
Subject: [PATCH 29/37] Fix truffle (#2514)

* Attempting to discard the trufflehog warning.

* Attempt to fix trufflehog.
---
 flake.lock                                    |  12 +-
 .../test_flash_llama_load.json                | 222 ++++++++----------
 .../models/test_flash_llama_prefix.py         |   2 -
 3 files changed, 104 insertions(+), 132 deletions(-)

diff --git a/flake.lock b/flake.lock
index 3a515eab..a079d418 100644
--- a/flake.lock
+++ b/flake.lock
@@ -853,11 +853,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1725848835,
-        "narHash": "sha256-u4lCr+tOEWhsFiww5G04U5jUNzaQJi0/ZMIDGiLeT14=",
+        "lastModified": 1726021481,
+        "narHash": "sha256-4J4E+Fh+77XIYnq2RVtg+ENWXpu6t74P0jKN/f2RQmI=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "2ef910a6276a2f34513d18f2f826a8dea72c3b3f",
+        "rev": "1c2c120246c51a644c20ba2a36a33d3bd4860d70",
         "type": "github"
       },
       "original": {
@@ -978,11 +978,11 @@
         "nixpkgs": "nixpkgs_6"
       },
       "locked": {
-        "lastModified": 1725868835,
-        "narHash": "sha256-6OFEaFFRCG/JKkU6kHV08EPEGM1MCuKZ70NlGJcL/JY=",
+        "lastModified": 1725950569,
+        "narHash": "sha256-nJHA1SvIQbXySpL2ueNbzQOhnkQASa5tOLz/kdW0PWA=",
         "owner": "danieldk",
         "repo": "tgi-nix",
-        "rev": "87afbe21e2d2cc17e177c9965a64ba68ad7c22da",
+        "rev": "d40f3c22e9bcc5e16c94d4605cf6a7d74dd07f46",
         "type": "github"
       },
       "original": {
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json b/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
index dbf3c03a..b7c6177b 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_prefix/test_flash_llama_load.json
@@ -14,7 +14,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -40,7 +40,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -66,7 +66,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -92,7 +92,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -118,7 +118,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -144,7 +144,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -170,7 +170,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -196,7 +196,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -222,7 +222,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -248,7 +248,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -274,7 +274,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -300,7 +300,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -326,7 +326,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -352,7 +352,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -378,7 +378,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -404,7 +404,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -430,7 +430,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085330,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -456,7 +456,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -482,7 +482,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -508,7 +508,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -534,7 +534,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -560,7 +560,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -586,7 +586,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -612,7 +612,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -638,7 +638,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -664,7 +664,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -675,32 +675,6 @@
       "total_tokens": 64
     }
   },
-  {
-    "choices": [
-      {
-        "finish_reason": "length",
-        "index": 0,
-        "logprobs": null,
-        "message": {
-          "content": "It seems like you're trying to connect a GraphQL",
-          "name": null,
-          "role": "assistant",
-          "tool_calls": null
-        },
-        "usage": null
-      }
-    ],
-    "created": 1725525941,
-    "id": "",
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "object": "chat.completion",
-    "system_fingerprint": "2.2.1-dev0-native",
-    "usage": {
-      "completion_tokens": 10,
-      "prompt_tokens": 232,
-      "total_tokens": 242
-    }
-  },
   {
     "choices": [
       {
@@ -716,7 +690,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -742,7 +716,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -768,7 +742,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -794,7 +768,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -820,7 +794,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -846,7 +820,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -872,7 +846,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -898,7 +872,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -924,7 +898,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -950,7 +924,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -976,7 +950,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1002,7 +976,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1028,7 +1002,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1054,7 +1028,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1080,7 +1054,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1106,7 +1080,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1132,7 +1106,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1158,7 +1132,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1184,7 +1158,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1210,7 +1184,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1236,7 +1210,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1262,7 +1236,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1288,7 +1262,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1314,7 +1288,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1340,7 +1314,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1366,7 +1340,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1392,7 +1366,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1418,7 +1392,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1444,7 +1418,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1470,7 +1444,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1496,7 +1470,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1522,7 +1496,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1548,7 +1522,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1574,7 +1548,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1600,7 +1574,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1626,7 +1600,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1652,7 +1626,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1678,7 +1652,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1704,7 +1678,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1730,7 +1704,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1756,7 +1730,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1782,7 +1756,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1808,7 +1782,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1834,7 +1808,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1860,7 +1834,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1886,7 +1860,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1912,7 +1886,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1938,7 +1912,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1964,7 +1938,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -1990,7 +1964,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2016,7 +1990,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2042,7 +2016,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2068,7 +2042,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2094,7 +2068,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2120,7 +2094,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2146,7 +2120,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2172,7 +2146,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2198,7 +2172,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2224,7 +2198,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2250,7 +2224,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2276,7 +2250,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2302,7 +2276,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2328,7 +2302,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2354,7 +2328,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2380,7 +2354,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2406,7 +2380,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2432,7 +2406,7 @@
         "usage": null
       }
     ],
-    "created": 1725525943,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2458,7 +2432,7 @@
         "usage": null
       }
     ],
-    "created": 1725525936,
+    "created": 1726085336,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2484,7 +2458,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2510,7 +2484,7 @@
         "usage": null
       }
     ],
-    "created": 1725525941,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2536,7 +2510,7 @@
         "usage": null
       }
     ],
-    "created": 1725525942,
+    "created": 1726085331,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
@@ -2562,7 +2536,7 @@
         "usage": null
       }
     ],
-    "created": 1725525935,
+    "created": 1726085326,
     "id": "",
     "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "object": "chat.completion",
diff --git a/integration-tests/models/test_flash_llama_prefix.py b/integration-tests/models/test_flash_llama_prefix.py
index ae97b301..3e48b054 100644
--- a/integration-tests/models/test_flash_llama_prefix.py
+++ b/integration-tests/models/test_flash_llama_prefix.py
@@ -45,7 +45,6 @@ async def test_flash_llama_load(
         'Web search results:\n\n[1] "Federal authorities are investigating the market-making arms of Citadel LLC and KCG Holdings Inc, looking into the possibility that the two giants of electronic trading are giving small investors ..."\nURL: https://www.reuters.com/article/usa-stocks-probe-idUSL2N1871ZV\n\n[2] "Today, KCG is second only to Citadel in the market for handling stock order flow from retail brokerage firms. KCG and many other high-frequency trading firms have shied away from the public..."\nURL: https://www.ibtimes.com/citadel-llc-kcg-holdings-kcg-market-making-arms-probed-federal-authorities-over-stock-2366805\n\n[3] "Citadel Securities, a group owned by the Chicago-based hedge fund, is the would-be acquirer in the deal, the people said. The group is best known for its so-called wholesaler business that..."\nURL: https://www.wsj.com/articles/market-making-arm-of-citadel-llc-in-talks-to-buy-seats-on-nyse-floor-from-kcg-holdings-1454533971\n\n[4] "Citadels share of the wholesale market is around 34 per cent compared to KCGs 25 per cent, according to Tabb Group. Virtu has yet to lay out in detail its plans for the wholesale business ..."\nURL: https://www.ft.com/content/e1cb396e-29a7-11e7-bc4b-5528796fe35c\n\n[5] "Citadel Securities, a liquidity providers and market maker, announced it will purchase KCG Holdings designated market maker (DMM) business at the New York Stock Exchange. This will establish Citadel Securities as the DMM with the largest footprint on the NYSE, responsible for trading in approximately 1,500 issues."\nURL: https://www.tradersmagazine.com/departments/brokerage/citadel-purchases-kcg-dmm-business-becomes-1-on-nyse/\n\n[6] "isCitadel LLC and its related entity, KCG IP Holdings, LLC (Complainant), represented by Paul D. McGradyof Winston Strawn, Illinois, Respondent is- (Respondent), Alabama, USA. REGISTRAR AND DISPUTED DOMAIN NAME The domain name at issue iscitidelgroup.com, registered with TUCOWS, INC. PANEL The"\nURL: https://www.adrforum.com/domaindecisions/1522837.htm\n\n[7] "KCG SPACE HOLDINGS LLC is an Active company incorporated on July 21, 2022 with the registered number M22000011413. This Foreign Limited Liability company is located at 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131, US and has been running for one year. It currently has one Authorized Person. KEY FACTS ABOUT KCG SPACE HOLDINGS LLC US Businesses"\nURL: https://bisprofiles.com/fl/kcg-space-holdings-m22000011413\n\n[8] "The Complainant KCG IP Holdings LLC is the owner of US Trademark Registration No. 3,213,943, filed October 18, 2004, registered February 27, 2007, claiming first use dating back to 1994. Therefore, the Panel concludes that Complainants filing and registration of the CITADEL mark with the USPTO sufficiently demonstrates that it has rights in ..."\nURL: https://www.adrforum.com/domaindecisions/1579141.htm\n\n[9] "The KCG SPACE HOLDINGS LLC principal address is 200 S BISCAYNE BLVD STE 3300, MIAMI, 33131. Meanwhile you can send your letters to 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131. The company`s registered agent is C T CORPORATION SYSTEM 1200 SOUTH PINE ISLAND ROAD, PLANTATION, FL, 33324. The company`s management are A, President - Beeson Gerald A."\nURL: https://florida.intercreditreport.com/company/kcg-space-holdings-llc-m22000011413\n\n[10] "Billionaire Ken Griffin has built Citadel Securities into a trading and asset management colossus. ... and KCG Holdings. Last month, Citadel Securities reached an agreement with the SEC to pay $22 ..."\nURL: https://www.chicagobusiness.com/article/20170203/NEWS01/170209978/chicago-billionaire-ken-griffin-splits-citadel-into-two-companies\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is KCG Space Holdings LLC\'s relationship to Citadel?',
         'Web search results:\n\n[1] "Citadel LLC (formerly known as Citadel Investment Group, LLC) is an American multinational hedge fund and financial services company. Founded in 1990 by Ken Griffin, it has more than $50 billion in assets under management as of May 2022. [1]"\nURL: https://en.wikipedia.org/wiki/Citadel\\_LLC\n\n[2] "NASHVILLE, Tenn. and BRONXVILLE, N.Y. \u2014 Standard Media Group LLC (Standard Media) and Citadel Communications LLC (Citadel) jointly announced today that they have reached an agreement pursuant to which Standard Media will acquire from Citadel WLNE-TV, the ABC affiliate for the Providence, RI - New Bedford, MA market (DMA 52) and KLKN (TV), the \u2026"\nURL: https://www.standardmedia.com/2019/05/16/standard-media-group-to-acquire-citadel-stations/\n\n[3] "CITADEL MEDIA LLC. Citadel Media LLC is a New Hampshire Domestic Limited-Liability Company filed on February 6, 2021. The companys filing status is listed as Not In Good Standing and its File Number is 862423. The Registered Agent on file for this company is Peter Alan Gauthier and is located at 3 Maple Ridge Drive Unit 224, Merrimack, NH 03054."\nURL: https://www.bizapedia.com/nh/citadel-media-llc.html\n\n[4] "CITADEL MEDIA LLC is a Michigan Domestic Limited-Liability Company filed on November 16, 2017. The companys filing status is listed as Active and its File Number is 802132896. The Registered Agent on file for this company is Registered Agents Inc. and is located at 2222 W. Grand River Ave Ste A, Okemos, MI 48864. The companys mailing address ..."\nURL: https://www.bizapedia.com/mi/citadel-media-llc.html\n\n[5] "Citadel Broadcasting Corporation was a Las Vegas, Nevada -based broadcast holding company. Citadel owned 243 radio stations across the United States and was the third-largest radio station owner in the country. Only iHeartMedia and Cumulus Media owned more stations prior to Citadels merger with Cumulus."\nURL: https://en.wikipedia.org/wiki/Citadel\\_Broadcasting\n\n[6] "Citadel is one of the largest hedge fund managers in the world. And theyve subsequently managed Melvin Capital to the ground. Melvin Capital suffered a loss of over 50% its first quarter in 2021 due to shorting AMC Entertainment and GameStop. At some point youd expect your clearing house to raise awareness on your risk management right?"\nURL: https://franknez.com/citadel-loses-billions-hedge-funds-are-getting-dragged-down/\n\n[7] "At our core, Citadel is built to deliver excellence. We have some of the most talented and focused minds in the industry, and we activate their ideas and strategies through a robust range of proven technologies and execution capabilities. View Top Employees from Citadel LLC Looking for a particular Citadel LLC employees phone or email? Find Info"\nURL: https://rocketreach.co/citadel-llc-profile\\_b5c46522f42e0dc2\n\n[8] "# 1 Most profitable hedge fund manager of all time Source: LCH Investment NV estimates, Top Hedge Fund Managers by Net Gains Since Inception as of 12/31/2022. Our people are relentless in seeking a better way. Each day, we reimagine and refine our strategies, models and technology in pursuit of superior results and long-term performance."\nURL: https://www.citadel.com/\n\n[9] "We are one of the most significant alternative investment managers in the public U.S. corporate credit markets. Explore Credit Convertibles Equities Equities represents one of the largest and longest tenured businesses at Citadel. Explore Equities Global Fixed Income Macro We are a leading fixed income and macro business."\nURL: https://www.citadel.com/what-we-do/\n\n[10] "Citadel. 203,101 followers. 1mo. Last weekend, we celebrated Citadels 30th anniversary at an incredible event at Disney World and Universal Studios. Our founder and CEO Ken Griffin summarized ..."\nURL: https://www.linkedin.com/company/citadel-llc\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is CITADEL MEDIA LLC?',
         "What are the differences between the Dogme approach to language learning and the lexical approach to language learning",
-        'trying to connect grapgql-engine with dogital ocean database i linux server,\nbut I am gettin this error. In my local machine it is working\n\n{"detail":{"info":{"database\\_url":"postgresql://doadmin:...@ips-backend-db-do-user-13678898-0.b.db.ondigitalocean.com:25060/Verlagg?sslmode=require","retries":1},"kind":"postgres\\_connection"},"level":"info","timestamp":"2023-03-27T10:40:29.956+0000","type":"startup"}\nbackend-graphql-engine-1 | {"timestamp":"2023-03-27T10:41:29.000+0000","level":"info","type":"startup","detail":{"kind":"migrations-startup","info":"failed waiting for 9691, try increasing HASURA\\_GRAPHQL\\_MIGRATIONS\\_SERVER\\_TIMEOUT (default: 30)"}',
         "Implement my own netfilter in linux with linux kernel module with Rust",
         "Damage to which nerve causes numbness of the palmar surface of the 5th digit/little finger",
         "Explain the fault-tolerance of the reaction control system on the Space Shuttle",
@@ -152,7 +151,6 @@ async def test_flash_llama_load(
         "/u/CruxHub: Hey Alice, I",
         "/u/CruxHub: Hey Alice, I",
         "The Dogme approach and the Lexical Approach are",
-        "It seems like you're trying to connect a GraphQL",
         "Implementing a netfilter in Linux with a Rust",
         "Damage to the Ulnar nerve can cause numb",
         "The Space Shuttle's Reaction Control System (RCS",

From 94304649f1d2e7eb9ff2b633c592eeb7a4343fc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 12 Sep 2024 10:44:01 +0200
Subject: [PATCH 30/37] nix: support Python tokenizer conversion in the router
 (#2515)

Ideally we wouldn't have the router wrapper that this change adds,
but when I give PyO3 a Python interpreter with packages, it ends
up linking libpython from the Python interpreter rather than the
constructed environment and cannot pick up the Python modules as
a result.
---
 flake.nix               | 23 ++++++++++++++++++++---
 nix/crate-overrides.nix |  3 +++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/flake.nix b/flake.nix
index 58a8e311..5be413c5 100644
--- a/flake.nix
+++ b/flake.nix
@@ -46,9 +46,26 @@
         launcher = cargoNix.workspaceMembers.text-generation-launcher.build.override {
           inherit crateOverrides;
         };
-        router = cargoNix.workspaceMembers.text-generation-router-v3.build.override {
-          inherit crateOverrides;
-        };
+        router =
+          let
+            routerUnwrapped = cargoNix.workspaceMembers.text-generation-router-v3.build.override {
+              inherit crateOverrides;
+            };
+            packagePath =
+              with pkgs.python3.pkgs;
+              makePythonPath [
+                protobuf
+                sentencepiece
+                torch
+                transformers
+              ];
+          in
+          pkgs.writeShellApplication {
+            name = "text-generation-router";
+            text = ''
+              PYTHONPATH="${packagePath}" ${routerUnwrapped}/bin/text-generation-router "$@"
+            '';
+          };
         server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
       in
       {
diff --git a/nix/crate-overrides.nix b/nix/crate-overrides.nix
index cbf366af..a4e74c6d 100644
--- a/nix/crate-overrides.nix
+++ b/nix/crate-overrides.nix
@@ -28,6 +28,9 @@ defaultCrateOverrides
       ];
     };
   };
+  pyo3-build-config = attrs: {
+    buildInputs = [ python3 ];
+  };
   text-generation-benchmark = attrs: {
     src = filter {
       root = ../benchmark;

From d95c670ada3b9160e7066d7969f441daa0c2bc67 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 12 Sep 2024 14:54:56 +0200
Subject: [PATCH 31/37] Add nix test. (#2513)

* Add nix test.

* Modifying yourself means you need to rerun.

* Fixing the test + adding click (needed for pre-commit hooks).

* Try thuis.

* Our runner + pure test (not written)

* Reemove server.

* Root user.

* Different user ?

* Add the actual test target.

* Forgot this modification.

* Add a formatter.

* Add the secrets.

* Fixed the auth token ?

* Adding the other tests.

* Missing pre-commit.

* Test requires cargo for cargo fmt.

* Update it a bit.

* Up.

* Attempting to use a cache location for the models.

* Ignore the cache for now.
---
 .github/workflows/nix_tests.yaml | 41 ++++++++++++++++++++++++++++++++
 flake.nix                        | 25 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 .github/workflows/nix_tests.yaml

diff --git a/.github/workflows/nix_tests.yaml b/.github/workflows/nix_tests.yaml
new file mode 100644
index 00000000..f2209f8a
--- /dev/null
+++ b/.github/workflows/nix_tests.yaml
@@ -0,0 +1,41 @@
+name: "Nix Tests"
+on:
+  pull_request:
+    paths:
+      - ".github/workflows/nix_tests.yaml"
+      - "server/**"
+      - "proto/**"
+      - "router/**"
+      - "launcher/**"
+      - "Cargo.lock"
+      - "rust-toolchain.toml"
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  tests:
+    runs-on:
+      group: aws-highmemory-32-plus-priv
+    steps:
+    - uses: actions/checkout@v4
+    - uses: cachix/install-nix-action@v27
+      with:
+        nix_path: nixpkgs=channel:nixos-unstable
+    - uses: cachix/cachix-action@v14
+      with:
+        name: text-generation-inference
+        # If you chose signing key for write access
+        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
+      env:
+        USER: github_runner
+    - name: Build
+      run: nix develop .#test --command echo "Ok"
+    - name: Pre-commit tests.
+      run: nix develop .#test --command pre-commit run --all-files
+    - name: Python tests.
+      run: nix develop .#test --command python -m pytest server/tests/
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    - name: Rust tests.
+      run: nix develop .#test --command cargo test
diff --git a/flake.nix b/flake.nix
index 5be413c5..c6adf0a6 100644
--- a/flake.nix
+++ b/flake.nix
@@ -69,6 +69,7 @@
         server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
       in
       {
+        formatter = pkgs.nixfmt-rfc-style;
         devShells = with pkgs; rec {
           default = pure;
 
@@ -80,6 +81,29 @@
               server
             ];
           };
+          test = mkShell {
+            buildInputs =
+              [
+                # benchmark
+                # launcher
+                # router
+                server
+                openssl.dev
+                pkg-config
+                cargo
+                rustfmt
+                clippy
+              ]
+              ++ (with python3.pkgs; [
+                docker
+                pytest
+                pytest-asyncio
+                syrupy
+                pre-commit
+                ruff
+              ]);
+
+          };
 
           impure = mkShell {
             buildInputs =
@@ -99,6 +123,7 @@
                 docker
                 pip
                 ipdb
+                click
                 pyright
                 pytest
                 pytest-asyncio

From 628334d33638709e7b0f91fbaaa290fec2d016f3 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 12 Sep 2024 17:04:52 +0200
Subject: [PATCH 32/37] =?UTF-8?q?fix:=20pass=20missing=20revision=20arg=20?=
 =?UTF-8?q?for=20lora=20adapter=20when=20loading=20multiple=E2=80=A6=20(#2?=
 =?UTF-8?q?510)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix: pass missing revision arg for lora adapter when loading multiple adapters
---
 server/text_generation_server/utils/adapter.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/server/text_generation_server/utils/adapter.py b/server/text_generation_server/utils/adapter.py
index b7fc89df..31927b91 100644
--- a/server/text_generation_server/utils/adapter.py
+++ b/server/text_generation_server/utils/adapter.py
@@ -77,12 +77,12 @@ def load_and_merge_adapters(
 ) -> Tuple["ModuleMap", "AdapterConfig", Set[str], PreTrainedTokenizer]:
 
     if len(adapter_parameters.adapter_info) == 1:
-        adapter_info = next(iter(adapter_parameters.adapter_info))
+        adapter = next(iter(adapter_parameters.adapter_info))
         return load_module_map(
             model_id,
-            adapter_info.revision,
-            adapter_info.id,
-            adapter_info.path,
+            adapter.revision,
+            adapter.id,
+            adapter.path,
             weight_names,
             trust_remote_code,
         )
@@ -90,7 +90,6 @@ def load_and_merge_adapters(
     adapter_params = AdapterParametersContainer(adapter_parameters, adapter_index)
     return _load_and_merge(
         model_id,
-        adapter_params.revision,
         adapter_params,
         weight_names,
         trust_remote_code,
@@ -109,7 +108,6 @@ class AdapterParametersContainer:
 @lru_cache(maxsize=32)
 def _load_and_merge(
     model_id: str,
-    revision: str,
     adapter_params: AdapterParametersContainer,
     weight_names: Tuple[str],
     trust_remote_code: bool = False,
@@ -126,6 +124,7 @@ def _load_and_merge(
         module_map, adapter_config, adapter_weight_names, adapter_tokenizer = (
             load_module_map(
                 model_id,
+                adapter.revision,
                 adapter.id,
                 adapter.path,
                 weight_names,

From 3ac7df2b6dc93d33712619fcc739891d4b4b01da Mon Sep 17 00:00:00 2001
From: "Wang, Yi" <yi.a.wang@intel.com>
Date: Thu, 12 Sep 2024 23:23:49 +0800
Subject: [PATCH 33/37] hotfix : enable intel ipex cpu and xpu in python3.11
 (#2517)

enable intel ipex cpu and xpu in python3.11

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
---
 Dockerfile_intel                              | 63 ++++++++++++++-----
 .../layers/attention/ipex.py                  |  6 +-
 .../models/flash_causal_lm.py                 | 22 ++++---
 3 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/Dockerfile_intel b/Dockerfile_intel
index d7ac09af..7ab6bba1 100644
--- a/Dockerfile_intel
+++ b/Dockerfile_intel
@@ -44,9 +44,35 @@ RUN cargo build --profile release-opt
 
 # Text Generation Inference base image for Intel
 
-FROM intel/intel-extension-for-pytorch:2.1.30-xpu AS xpu
+FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
 
 USER root
+
+ARG MAMBA_VERSION=23.1.0-1
+ARG PYTHON_VERSION='3.11.10'
+# Automatically set by buildx
+ARG TARGETPLATFORM
+ENV PATH /opt/conda/bin:$PATH
+
+# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
+# Install mamba
+# translating Docker's TARGETPLATFORM into mamba arches
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+         *)              MAMBA_ARCH=x86_64   ;; \
+    esac && \
+    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
+RUN chmod +x ~/mambaforge.sh && \
+    bash ~/mambaforge.sh -b -p /opt/conda && \
+    rm ~/mambaforge.sh
+
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  exit 1 ;; \
+         *)              /opt/conda/bin/conda update -y conda &&  \
+                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
+
 # libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
 RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
     dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
@@ -56,7 +82,7 @@ RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dea
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
 
-RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev ninja-build pciutils
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit xpu-smi cmake ninja-build pciutils
 
 # Text Generation Inference base env
 ENV HF_HOME=/data \
@@ -65,9 +91,7 @@ ENV HF_HOME=/data \
 
 
 WORKDIR /usr/src
-RUN wget https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0.post1%2Bcxx11.abi-cp310-cp310-linux_x86_64.whl && pip install torch-2.1.0.post1+cxx11.abi-cp310-cp310-linux_x86_64.whl
-RUN pip install https://github.com/intel/intel-xpu-backend-for-triton/releases/download/v2.1.0/triton-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b distributed origin/dev/distributed
+RUN pip install torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi torchaudio==2.3.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
 
 # Install server
 COPY proto proto
@@ -82,14 +106,12 @@ ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
 ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
 ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
 ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
-ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
-ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/conda/lib
+ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
 ENV CCL_ZE_IPC_EXCHANGE=sockets
 ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
 ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
 
-RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch
-
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
 # Install router
@@ -133,12 +155,19 @@ RUN chmod +x ~/mambaforge.sh && \
     bash ~/mambaforge.sh -b -p /opt/conda && \
     rm ~/mambaforge.sh
 
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  exit 1 ;; \
+         *)              /opt/conda/bin/conda update -y conda &&  \
+                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
+
 RUN conda install -c conda-forge gperftools mkl
 
-RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.4.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
-RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.19.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
-RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
-RUN pip install triton numa
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.4.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.19.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
+RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
+RUN pip install triton py-libnuma
 
 WORKDIR /usr/src
 
@@ -151,10 +180,10 @@ RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update
 RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
 
 ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
-ENV CCL_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
-ENV I_MPI_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
-ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
-ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/lib
+ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
+ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
+ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
+ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
 
 # Install server
diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py
index 2d1427ae..d0eadc75 100644
--- a/server/text_generation_server/layers/attention/ipex.py
+++ b/server/text_generation_server/layers/attention/ipex.py
@@ -22,9 +22,9 @@ def attention(
 
     # We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
     ipex.llm.functional.varlen_attention(
-        q,
-        key_cache,
-        value_cache,
+        q.contiguous() if q.device.type == "xpu" else q,
+        key_cache.contiguous() if key_cache.device.type == "xpu" else key_cache,
+        value_cache.contiguous() if value_cache.device.type == "xpu" else value_cache,
         out,
         seqlen.cu_seqlen_q,
         seqlen.cu_seqlen_q,
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 834056aa..a2834962 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -82,7 +82,7 @@ def init_cpu_threads_env(rank_id: int, world_size: int):
         import numa
         import psutil
 
-        nodes = numa.get_max_node() + 1
+        nodes = numa.info.get_max_node() + 1
         rank_per_node = math.ceil(world_size / nodes)
         num_cpus_per_nodes = int(psutil.cpu_count(logical=False) / nodes)
         node_id = int(rank_id / rank_per_node)
@@ -91,18 +91,22 @@ def init_cpu_threads_env(rank_id: int, world_size: int):
             num_cpus_per_rank = max(int(num_cpus_per_nodes / rank_per_node), 1)
         else:
             num_cpus_per_rank = int(os.getenv("OMP_NUM_THREADS"))
-        if len(numa.get_membind()) == nodes:
-            numa.set_membind([node_id])
+        if len(numa.memory.get_membind_nodes()) == nodes:
+            numa.memory.set_membind_nodes((node_id))
         torch.set_num_threads(num_cpus_per_rank)
-        if len(numa.get_affinity(0)) == psutil.cpu_count(logical=True):
+        if len(numa.schedule.get_affinitive_cpus(0)) == psutil.cpu_count(logical=True):
             cpu_start = num_cpus_per_rank * rank_offset_per_node
-            numa.set_affinity(
+            numa.schedule.run_on_cpus(
                 0,
-                list(numa.node_to_cpus(node_id))[
-                    cpu_start : cpu_start + num_cpus_per_rank
-                ],
+                *(
+                    numa.info.node_to_cpus(node_id)[
+                        cpu_start : cpu_start + num_cpus_per_rank
+                    ]
+                ),
             )
-        logger.info(f"affinity={numa.get_affinity(0)}, membind = {numa.get_membind()}")
+        logger.info(
+            f"affinity={numa.schedule.get_affinitive_cpus(0)}, membind = {numa.memory.get_membind_nodes()}"
+        )
 
 
 @dataclass

From 9cca3e0b037a5b86a839caf4dcdc6a177fbb892c Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <strickvl@users.noreply.github.com>
Date: Fri, 13 Sep 2024 18:45:28 +0200
Subject: [PATCH 34/37] Use `ratatui` not (deprecated) `tui` (#2521)

* use ratatui not archived tui

* bump ratatui all the way with options
---
 Cargo.lock           | 119 +++++++++++++++++++++++++++++++------------
 benchmark/Cargo.toml |   4 +-
 benchmark/src/app.rs |  21 ++++----
 benchmark/src/lib.rs |   4 +-
 4 files changed, 100 insertions(+), 48 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 06a61853..93cef828 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -52,6 +52,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
 
+[[package]]
+name = "allocator-api2"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
+
 [[package]]
 name = "anstream"
 version = "0.6.15"
@@ -588,6 +594,15 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
+[[package]]
+name = "castaway"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5"
+dependencies = [
+ "rustversion",
+]
+
 [[package]]
 name = "cc"
 version = "1.1.15"
@@ -729,6 +744,20 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
 
+[[package]]
+name = "compact_str"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644"
+dependencies = [
+ "castaway",
+ "cfg-if",
+ "itoa",
+ "rustversion",
+ "ryu",
+ "static_assertions",
+]
+
 [[package]]
 name = "console"
 version = "0.15.8"
@@ -848,15 +877,15 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
 
 [[package]]
 name = "crossterm"
-version = "0.27.0"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
+checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6"
 dependencies = [
  "bitflags 2.6.0",
  "crossterm_winapi",
- "libc",
- "mio 0.8.11",
+ "mio",
  "parking_lot",
+ "rustix",
  "signal-hook",
  "signal-hook-mio",
  "winapi",
@@ -1473,6 +1502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 dependencies = [
  "ahash",
+ "allocator-api2",
 ]
 
 [[package]]
@@ -1809,12 +1839,6 @@ dependencies = [
  "unicode-width",
 ]
 
-[[package]]
-name = "indoc"
-version = "2.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
-
 [[package]]
 name = "init-tracing-opentelemetry"
 version = "0.14.1"
@@ -1828,6 +1852,16 @@ dependencies = [
  "tracing-opentelemetry 0.21.0",
 ]
 
+[[package]]
+name = "instability"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b23a0c8dfe501baac4adf6ebbfa6eddf8f0c07f56b058cc1288017e32397846c"
+dependencies = [
+ "quote",
+ "syn 2.0.76",
+]
+
 [[package]]
 name = "instant"
 version = "0.1.13"
@@ -2066,6 +2100,15 @@ dependencies = [
  "imgref",
 ]
 
+[[package]]
+name = "lru"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904"
+dependencies = [
+ "hashbrown 0.14.5",
+]
+
 [[package]]
 name = "macro_rules_attribute"
 version = "0.2.0"
@@ -2234,18 +2277,6 @@ dependencies = [
  "adler2",
 ]
 
-[[package]]
-name = "mio"
-version = "0.8.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
-dependencies = [
- "libc",
- "log",
- "wasi",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "mio"
 version = "1.0.2"
@@ -2254,6 +2285,7 @@ checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
  "hermit-abi 0.3.9",
  "libc",
+ "log",
  "wasi",
  "windows-sys 0.52.0",
 ]
@@ -3255,18 +3287,22 @@ dependencies = [
 
 [[package]]
 name = "ratatui"
-version = "0.23.0"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad"
+checksum = "fdef7f9be5c0122f890d58bdf4d964349ba6a6161f705907526d891efabba57d"
 dependencies = [
  "bitflags 2.6.0",
  "cassowary",
+ "compact_str",
  "crossterm",
- "indoc",
- "itertools 0.11.0",
+ "instability",
+ "itertools 0.13.0",
+ "lru",
  "paste",
  "strum",
+ "strum_macros",
  "unicode-segmentation",
+ "unicode-truncate",
  "unicode-width",
 ]
 
@@ -3861,7 +3897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
 dependencies = [
  "libc",
- "mio 0.8.11",
+ "mio",
  "signal-hook",
 ]
 
@@ -3956,6 +3992,12 @@ dependencies = [
  "unicode-segmentation",
 ]
 
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
@@ -3964,20 +4006,20 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "strum"
-version = "0.25.0"
+version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
+checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
  "strum_macros",
 ]
 
 [[package]]
 name = "strum_macros"
-version = "0.25.3"
+version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
 dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
  "rustversion",
@@ -4489,7 +4531,7 @@ dependencies = [
  "backtrace",
  "bytes",
  "libc",
- "mio 1.0.2",
+ "mio",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
@@ -4933,6 +4975,17 @@ version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
 
+[[package]]
+name = "unicode-truncate"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf"
+dependencies = [
+ "itertools 0.13.0",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
 [[package]]
 name = "unicode-width"
 version = "0.1.13"
diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml
index f82659c9..7d3d8b18 100644
--- a/benchmark/Cargo.toml
+++ b/benchmark/Cargo.toml
@@ -16,7 +16,7 @@ path = "src/main.rs"
 [dependencies]
 average = "0.14"
 clap = { version = "4.4.5", features = ["derive", "env"] }
-crossterm = "0.27"
+crossterm = "0.28.1"
 float-ord = "0.3.2"
 serde = {version = "1.0.188", features = ["derive"]}
 serde_json = "1.0"
@@ -25,7 +25,7 @@ text-generation-client = { path = "../backends/client" }
 thiserror = "1.0.48"
 tokenizers = { workspace = true }
 tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "macros"] }
-tui = {package = "ratatui", version = "0.23", default-features = false, features = ["crossterm"]}
+ratatui = { version = "0.28.1", default-features = false, features = ["crossterm"] }
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
 hf-hub = { workspace = true }
diff --git a/benchmark/src/app.rs b/benchmark/src/app.rs
index a0a9313a..7e4d1181 100644
--- a/benchmark/src/app.rs
+++ b/benchmark/src/app.rs
@@ -1,16 +1,15 @@
 /// Inspired by https://github.com/hatoo/oha/blob/bb989ea3cd77727e7743e7daa60a19894bb5e901/src/monitor.rs
 use crate::generation::{Decode, Message, Prefill};
 use crossterm::event::{KeyCode, KeyEvent, KeyModifiers};
-use text_generation_client::ClientError;
-use tokio::sync::mpsc;
-use tui::backend::Backend;
-use tui::layout::{Alignment, Constraint, Direction, Layout};
-use tui::style::{Color, Modifier, Style};
-use tui::text::{Line, Span};
-use tui::widgets::{
+use ratatui::layout::{Alignment, Constraint, Direction, Layout};
+use ratatui::style::{Color, Modifier, Style};
+use ratatui::text::{Line, Span};
+use ratatui::widgets::{
     Axis, BarChart, Block, Borders, Chart, Dataset, Gauge, GraphType, Paragraph, Tabs,
 };
-use tui::{symbols, Frame};
+use ratatui::{symbols, Frame};
+use text_generation_client::ClientError;
+use tokio::sync::mpsc;
 
 /// TUI powered App
 pub(crate) struct App {
@@ -153,7 +152,7 @@ impl App {
     }
 
     /// Render frame
-    pub fn render<B: Backend>(&mut self, f: &mut Frame<'_, B>) {
+    pub fn render(&mut self, f: &mut Frame) {
         let batch_progress =
             (self.completed_batch as f64 / self.data.batch_size.len() as f64).clamp(0.0, 1.0);
         let run_progress =
@@ -172,7 +171,7 @@ impl App {
                 ]
                 .as_ref(),
             )
-            .split(f.size());
+            .split(f.area());
 
         // Top row horizontal layout
         let top = Layout::default()
@@ -239,7 +238,7 @@ impl App {
         f.render_widget(helper, row5[0]);
 
         // Batch tabs
-        let titles = self
+        let titles: Vec<Line> = self
             .data
             .batch_size
             .iter()
diff --git a/benchmark/src/lib.rs b/benchmark/src/lib.rs
index c33d64e6..0469b6fd 100644
--- a/benchmark/src/lib.rs
+++ b/benchmark/src/lib.rs
@@ -7,12 +7,12 @@ mod utils;
 use crate::app::App;
 use crate::event::Event;
 use crossterm::ExecutableCommand;
+use ratatui::backend::CrosstermBackend;
+use ratatui::Terminal;
 use std::io;
 use text_generation_client::v3::{GrammarType, NextTokenChooserParameters, ShardedClient};
 use tokenizers::Tokenizer;
 use tokio::sync::{broadcast, mpsc};
-use tui::backend::CrosstermBackend;
-use tui::Terminal;
 
 /// Run benchmarking app
 #[allow(clippy::too_many_arguments)]

From 777465529730b2cd52126c5372066dffb2b5cd5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 16 Sep 2024 12:39:18 +0200
Subject: [PATCH 35/37] Add tests for Mixtral (#2520)

Disable by default because CI runners do not have enough GPUs.
---
 .../test_flash_mixtral.json                   | 114 +++++
 .../test_flash_mixtral_all_params.json        |  99 ++++
 .../test_flash_mixtral_load.json              | 458 ++++++++++++++++++
 .../models/test_flash_mixtral.py              |  75 +++
 4 files changed, 746 insertions(+)
 create mode 100644 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json
 create mode 100644 integration-tests/models/test_flash_mixtral.py

diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json
new file mode 100644
index 00000000..56419967
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral.json
@@ -0,0 +1,114 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 1824,
+        "logprob": -6.1445312,
+        "text": "What"
+      },
+      {
+        "id": 349,
+        "logprob": -1.4648438,
+        "text": "is"
+      },
+      {
+        "id": 21135,
+        "logprob": -13.6875,
+        "text": "gradient"
+      },
+      {
+        "id": 24871,
+        "logprob": -1.6005859,
+        "text": "descent"
+      },
+      {
+        "id": 28804,
+        "logprob": -0.39526367,
+        "text": "?"
+      },
+      {
+        "id": 13,
+        "logprob": -0.640625,
+        "text": "\n"
+      },
+      {
+        "id": 13,
+        "logprob": -0.18774414,
+        "text": "\n"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 20910,
+        "logprob": -0.96484375,
+        "special": false,
+        "text": "Grad"
+      },
+      {
+        "id": 722,
+        "logprob": -0.003168106,
+        "special": false,
+        "text": "ient"
+      },
+      {
+        "id": 24871,
+        "logprob": -0.16540527,
+        "special": false,
+        "text": " descent"
+      },
+      {
+        "id": 349,
+        "logprob": -0.08886719,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 396,
+        "logprob": -0.75878906,
+        "special": false,
+        "text": " an"
+      },
+      {
+        "id": 18586,
+        "logprob": -0.5703125,
+        "special": false,
+        "text": " optimization"
+      },
+      {
+        "id": 9464,
+        "logprob": -0.11242676,
+        "special": false,
+        "text": " algorithm"
+      },
+      {
+        "id": 1307,
+        "logprob": -0.7939453,
+        "special": false,
+        "text": " used"
+      },
+      {
+        "id": 298,
+        "logprob": -0.17102051,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 26518,
+        "logprob": -0.34326172,
+        "special": false,
+        "text": " minimize"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Gradient descent is an optimization algorithm used to minimize"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json
new file mode 100644
index 00000000..00da1fed
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_all_params.json
@@ -0,0 +1,99 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 24871,
+        "logprob": -17.234375,
+        "text": "descent"
+      },
+      {
+        "id": 28804,
+        "logprob": -7.4335938,
+        "text": "?"
+      },
+      {
+        "id": 13,
+        "logprob": -0.8017578,
+        "text": "\n"
+      },
+      {
+        "id": 13,
+        "logprob": -0.32958984,
+        "text": "\n"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 1313,
+        "logprob": -2.3613281,
+        "special": false,
+        "text": "It"
+      },
+      {
+        "id": 3969,
+        "logprob": -0.7285156,
+        "special": false,
+        "text": " seems"
+      },
+      {
+        "id": 298,
+        "logprob": -1.3466797,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 528,
+        "logprob": 0.0,
+        "special": false,
+        "text": " me"
+      },
+      {
+        "id": 28725,
+        "logprob": -1.6757812,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 369,
+        "logprob": -0.06585693,
+        "special": false,
+        "text": " that"
+      },
+      {
+        "id": 513,
+        "logprob": -1.1269531,
+        "special": false,
+        "text": " if"
+      },
+      {
+        "id": 368,
+        "logprob": 0.0,
+        "special": false,
+        "text": " you"
+      },
+      {
+        "id": 28742,
+        "logprob": -2.4921875,
+        "special": false,
+        "text": "'"
+      },
+      {
+        "id": 267,
+        "logprob": 0.0,
+        "special": false,
+        "text": "re"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "What is gradient descent?\n\nIt seems to me, that if you're"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json
new file mode 100644
index 00000000..55056cfd
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_mixtral/test_flash_mixtral_load.json
@@ -0,0 +1,458 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1824,
+          "logprob": -6.1445312,
+          "text": "What"
+        },
+        {
+          "id": 349,
+          "logprob": -1.4648438,
+          "text": "is"
+        },
+        {
+          "id": 21135,
+          "logprob": -13.6875,
+          "text": "gradient"
+        },
+        {
+          "id": 24871,
+          "logprob": -1.6005859,
+          "text": "descent"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.39526367,
+          "text": "?"
+        },
+        {
+          "id": 13,
+          "logprob": -0.640625,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.18774414,
+          "text": "\n"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 20910,
+          "logprob": -0.96484375,
+          "special": false,
+          "text": "Grad"
+        },
+        {
+          "id": 722,
+          "logprob": -0.003168106,
+          "special": false,
+          "text": "ient"
+        },
+        {
+          "id": 24871,
+          "logprob": -0.16369629,
+          "special": false,
+          "text": " descent"
+        },
+        {
+          "id": 349,
+          "logprob": -0.0881958,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 396,
+          "logprob": -0.76708984,
+          "special": false,
+          "text": " an"
+        },
+        {
+          "id": 18586,
+          "logprob": -0.57373047,
+          "special": false,
+          "text": " optimization"
+        },
+        {
+          "id": 9464,
+          "logprob": -0.11291504,
+          "special": false,
+          "text": " algorithm"
+        },
+        {
+          "id": 1307,
+          "logprob": -0.79589844,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 298,
+          "logprob": -0.1694336,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 26518,
+          "logprob": -0.34350586,
+          "special": false,
+          "text": " minimize"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "Gradient descent is an optimization algorithm used to minimize"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1824,
+          "logprob": -6.1445312,
+          "text": "What"
+        },
+        {
+          "id": 349,
+          "logprob": -1.4677734,
+          "text": "is"
+        },
+        {
+          "id": 21135,
+          "logprob": -13.6875,
+          "text": "gradient"
+        },
+        {
+          "id": 24871,
+          "logprob": -1.6015625,
+          "text": "descent"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.39453125,
+          "text": "?"
+        },
+        {
+          "id": 13,
+          "logprob": -0.6435547,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.18713379,
+          "text": "\n"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 20910,
+          "logprob": -0.9628906,
+          "special": false,
+          "text": "Grad"
+        },
+        {
+          "id": 722,
+          "logprob": -0.0032176971,
+          "special": false,
+          "text": "ient"
+        },
+        {
+          "id": 24871,
+          "logprob": -0.16540527,
+          "special": false,
+          "text": " descent"
+        },
+        {
+          "id": 349,
+          "logprob": -0.08898926,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 396,
+          "logprob": -0.765625,
+          "special": false,
+          "text": " an"
+        },
+        {
+          "id": 18586,
+          "logprob": -0.5708008,
+          "special": false,
+          "text": " optimization"
+        },
+        {
+          "id": 9464,
+          "logprob": -0.11401367,
+          "special": false,
+          "text": " algorithm"
+        },
+        {
+          "id": 1307,
+          "logprob": -0.7963867,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 298,
+          "logprob": -0.17028809,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 26518,
+          "logprob": -0.34326172,
+          "special": false,
+          "text": " minimize"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "Gradient descent is an optimization algorithm used to minimize"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1824,
+          "logprob": -6.140625,
+          "text": "What"
+        },
+        {
+          "id": 349,
+          "logprob": -1.4658203,
+          "text": "is"
+        },
+        {
+          "id": 21135,
+          "logprob": -13.6796875,
+          "text": "gradient"
+        },
+        {
+          "id": 24871,
+          "logprob": -1.5898438,
+          "text": "descent"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.3955078,
+          "text": "?"
+        },
+        {
+          "id": 13,
+          "logprob": -0.64501953,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.18493652,
+          "text": "\n"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 20910,
+          "logprob": -0.9580078,
+          "special": false,
+          "text": "Grad"
+        },
+        {
+          "id": 722,
+          "logprob": -0.0032176971,
+          "special": false,
+          "text": "ient"
+        },
+        {
+          "id": 24871,
+          "logprob": -0.16552734,
+          "special": false,
+          "text": " descent"
+        },
+        {
+          "id": 349,
+          "logprob": -0.08874512,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 396,
+          "logprob": -0.75878906,
+          "special": false,
+          "text": " an"
+        },
+        {
+          "id": 18586,
+          "logprob": -0.5703125,
+          "special": false,
+          "text": " optimization"
+        },
+        {
+          "id": 9464,
+          "logprob": -0.11236572,
+          "special": false,
+          "text": " algorithm"
+        },
+        {
+          "id": 1307,
+          "logprob": -0.79541016,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 298,
+          "logprob": -0.17102051,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 26518,
+          "logprob": -0.34326172,
+          "special": false,
+          "text": " minimize"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "Gradient descent is an optimization algorithm used to minimize"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1824,
+          "logprob": -6.1328125,
+          "text": "What"
+        },
+        {
+          "id": 349,
+          "logprob": -1.4658203,
+          "text": "is"
+        },
+        {
+          "id": 21135,
+          "logprob": -13.6796875,
+          "text": "gradient"
+        },
+        {
+          "id": 24871,
+          "logprob": -1.5947266,
+          "text": "descent"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.39648438,
+          "text": "?"
+        },
+        {
+          "id": 13,
+          "logprob": -0.6464844,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.18688965,
+          "text": "\n"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 20910,
+          "logprob": -0.9609375,
+          "special": false,
+          "text": "Grad"
+        },
+        {
+          "id": 722,
+          "logprob": -0.003168106,
+          "special": false,
+          "text": "ient"
+        },
+        {
+          "id": 24871,
+          "logprob": -0.16601562,
+          "special": false,
+          "text": " descent"
+        },
+        {
+          "id": 349,
+          "logprob": -0.088134766,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 396,
+          "logprob": -0.7597656,
+          "special": false,
+          "text": " an"
+        },
+        {
+          "id": 18586,
+          "logprob": -0.5708008,
+          "special": false,
+          "text": " optimization"
+        },
+        {
+          "id": 9464,
+          "logprob": -0.11291504,
+          "special": false,
+          "text": " algorithm"
+        },
+        {
+          "id": 1307,
+          "logprob": -0.7944336,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 298,
+          "logprob": -0.17102051,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 26518,
+          "logprob": -0.34399414,
+          "special": false,
+          "text": " minimize"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "Gradient descent is an optimization algorithm used to minimize"
+  }
+]
diff --git a/integration-tests/models/test_flash_mixtral.py b/integration-tests/models/test_flash_mixtral.py
new file mode 100644
index 00000000..24ae1f48
--- /dev/null
+++ b/integration-tests/models/test_flash_mixtral.py
@@ -0,0 +1,75 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_mixtral_handle(launcher):
+    with launcher("mistralai/Mixtral-8x7B-v0.1", num_shard=8) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_mixtral(flash_mixtral_handle):
+    await flash_mixtral_handle.health(300)
+    return flash_mixtral_handle.client
+
+
+@pytest.mark.skip(reason="requires > 4 shards")
+@pytest.mark.asyncio
+async def test_flash_mixtral(flash_mixtral, response_snapshot):
+    response = await flash_mixtral.generate(
+        "What is gradient descent?\n\n", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text
+        == "Gradient descent is an optimization algorithm used to minimize"
+    )
+    assert response == response_snapshot
+
+
+@pytest.mark.skip(reason="requires > 4 shards")
+@pytest.mark.asyncio
+async def test_flash_mixtral_all_params(flash_mixtral, response_snapshot):
+    response = await flash_mixtral.generate(
+        "What is gradient descent?\n\n",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text
+        == "What is gradient descent?\n\nIt seems to me, that if you're"
+    )
+    assert response == response_snapshot
+
+
+@pytest.mark.skip(reason="requires > 4 shards")
+@pytest.mark.asyncio
+async def test_flash_mixtral_load(flash_mixtral, generate_load, response_snapshot):
+    responses = await generate_load(
+        flash_mixtral, "What is gradient descent?\n\n", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert responses[0].details.generated_tokens == 10
+    assert (
+        responses[0].generated_text
+        == "Gradient descent is an optimization algorithm used to minimize"
+    )
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+
+    assert responses == response_snapshot

From 38fcafcf96f096f9471c1b8608a65ffe7a9ea4d2 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 16 Sep 2024 17:00:54 +0200
Subject: [PATCH 36/37] Adding a test for FD. (#2516)

* Adding a test for FD.

* Fixing flashdecoding (empty batch doesn't work).

* Fixing the invalid popping.

* Fixing radix with block_size > 1

* Last reference.

* Use an actual hash.

* Update hash for slice.len() == 1

* Update the locks.

* Increasing docker timeout.
---
 Cargo.lock                                    |  312 +-
 backends/v3/src/queue.rs                      |    8 +-
 backends/v3/src/radix.rs                      |   76 +-
 flake.lock                                    |   12 +-
 integration-tests/conftest.py                 |    7 +
 .../test_flash_llama_flashdecoding.json       | 2550 +++++++++++++++++
 .../test_flash_llama_prefix_flashdecoding.py  |  229 ++
 7 files changed, 3003 insertions(+), 191 deletions(-)
 create mode 100644 integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json
 create mode 100644 integration-tests/models/test_flash_llama_prefix_flashdecoding.py

diff --git a/Cargo.lock b/Cargo.lock
index 93cef828..6f5df898 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "addr2line"
-version = "0.22.0"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
+checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375"
 dependencies = [
  "gimli",
 ]
@@ -109,9 +109,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.86"
+version = "1.0.88"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+checksum = "4e1496f8fb1fbf272686b8d37f523dab3e4a7443300055e74cdaa449f3114356"
 
 [[package]]
 name = "arbitrary"
@@ -133,7 +133,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -172,18 +172,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.81"
+version = "0.1.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
+checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -257,9 +257,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-rs"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ae74d9bd0a7530e8afd1770739ad34b36838829d6ad61818f9230f683f5ad77"
+checksum = "2f95446d919226d587817a7d21379e6eb099b97b45110a7f272a444ca5c54070"
 dependencies = [
  "aws-lc-sys",
  "mirai-annotations",
@@ -269,9 +269,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-sys"
-version = "0.20.1"
+version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f0e249228c6ad2d240c2dc94b714d711629d52bad946075d8e9b2f5391f0703"
+checksum = "234314bd569802ec87011d653d6815c6d7b9ffb969e9fee5b8b20ef860e8dce9"
 dependencies = [
  "bindgen",
  "cc",
@@ -406,17 +406,17 @@ dependencies = [
 
 [[package]]
 name = "backtrace"
-version = "0.3.73"
+version = "0.3.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a"
+checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
 dependencies = [
  "addr2line",
- "cc",
  "cfg-if",
  "libc",
- "miniz_oxide 0.7.4",
+ "miniz_oxide 0.8.0",
  "object",
  "rustc-demangle",
+ "windows-targets 0.52.6",
 ]
 
 [[package]]
@@ -456,7 +456,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.76",
+ "syn 2.0.77",
  "which",
 ]
 
@@ -528,9 +528,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
 
 [[package]]
 name = "bytemuck"
-version = "1.17.1"
+version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2"
+checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae"
 
 [[package]]
 name = "byteorder"
@@ -605,9 +605,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.15"
+version = "1.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
+checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476"
 dependencies = [
  "jobserver",
  "libc",
@@ -675,9 +675,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.16"
+version = "4.5.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
+checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -685,9 +685,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.15"
+version = "4.5.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
+checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73"
 dependencies = [
  "anstream",
  "anstyle",
@@ -704,7 +704,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -789,9 +789,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.13"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad"
+checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0"
 dependencies = [
  "libc",
 ]
@@ -949,9 +949,9 @@ dependencies = [
 
 [[package]]
 name = "cxx"
-version = "1.0.126"
+version = "1.0.128"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c4eae4b7fc8dcb0032eb3b1beee46b38d371cdeaf2d0c64b9944f6f69ad7755"
+checksum = "54ccead7d199d584d139148b04b4a368d1ec7556a1d9ea2548febb1b9d49f9a4"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -961,9 +961,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.126"
+version = "1.0.128"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c822bf7fb755d97328d6c337120b6f843678178751cba33c9da25cf522272e0"
+checksum = "c77953e99f01508f89f55c494bfa867171ef3a6c8cea03d26975368f2121a5c1"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -971,24 +971,24 @@ dependencies = [
  "proc-macro2",
  "quote",
  "scratch",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.126"
+version = "1.0.128"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719d6197dc016c88744aff3c0d0340a01ecce12e8939fc282e7c8f583ee64bc6"
+checksum = "65777e06cc48f0cb0152024c77d6cf9e4bdb4408e7b48bea993d42fa0f5b02b6"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.126"
+version = "1.0.128"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de3b547387863c8f82013c4f79f1c2162edee956383e4089e1d04c18c4f16c"
+checksum = "98532a60dedaebc4848cb2cba5023337cc9ea3af16a5b062633fabfd9f18fb60"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1012,7 +1012,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1023,7 +1023,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1037,33 +1037,33 @@ dependencies = [
 
 [[package]]
 name = "derive_builder"
-version = "0.20.0"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
+checksum = "cd33f37ee6a119146a1781d3356a7c26028f83d779b2e04ecd45fdc75c76877b"
 dependencies = [
  "derive_builder_macro",
 ]
 
 [[package]]
 name = "derive_builder_core"
-version = "0.20.0"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
+checksum = "7431fa049613920234f22c47fdc33e6cf3ee83067091ea4277a3f8c4587aae38"
 dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
 name = "derive_builder_macro"
-version = "0.20.0"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
+checksum = "4abae7035bf79b9877b779505d8cf3749285b80c43941eda66604841889451dc"
 dependencies = [
  "derive_builder_core",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1338,7 +1338,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1415,9 +1415,9 @@ dependencies = [
 
 [[package]]
 name = "gimli"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
+checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64"
 
 [[package]]
 name = "glob"
@@ -1447,7 +1447,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1466,7 +1466,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.1.0",
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -1688,16 +1688,16 @@ dependencies = [
 
 [[package]]
 name = "hyper-rustls"
-version = "0.27.2"
+version = "0.27.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155"
+checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333"
 dependencies = [
  "futures-util",
  "http 1.1.0",
  "hyper 1.4.1",
  "hyper-util",
  "log",
- "rustls 0.23.12",
+ "rustls 0.23.13",
  "rustls-native-certs",
  "rustls-pki-types",
  "tokio",
@@ -1732,9 +1732,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
+checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1817,9 +1817,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
+checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5"
 dependencies = [
  "equivalent",
  "hashbrown 0.14.5",
@@ -1839,6 +1839,12 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "indoc"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
+
 [[package]]
 name = "init-tracing-opentelemetry"
 version = "0.14.1"
@@ -1859,7 +1865,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b23a0c8dfe501baac4adf6ebbfa6eddf8f0c07f56b058cc1288017e32397846c"
 dependencies = [
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -1879,14 +1885,14 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
 name = "ipnet"
-version = "2.9.0"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
+checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4"
 
 [[package]]
 name = "is_terminal_polyfill"
@@ -1979,7 +1985,7 @@ dependencies = [
  "anyhow",
  "base64 0.21.7",
  "bytecount",
- "clap 4.5.16",
+ "clap 4.5.17",
  "fancy-regex",
  "fraction",
  "getrandom",
@@ -2191,7 +2197,7 @@ dependencies = [
  "hyper 1.4.1",
  "hyper-rustls",
  "hyper-util",
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "ipnet",
  "metrics",
  "metrics-util",
@@ -2314,7 +2320,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -2514,7 +2520,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -2585,18 +2591,18 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
 [[package]]
 name = "object"
-version = "0.36.3"
+version = "0.36.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
+checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "once_cell"
-version = "1.19.0"
+version = "1.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe"
 
 [[package]]
 name = "onig"
@@ -2649,7 +2655,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -2688,7 +2694,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a"
 dependencies = [
  "futures-core",
  "futures-sink",
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "js-sys",
  "once_cell",
  "pin-project-lite",
@@ -2912,7 +2918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
 ]
 
 [[package]]
@@ -2932,7 +2938,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -2955,9 +2961,9 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
 
 [[package]]
 name = "plotters"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
 dependencies = [
  "num-traits",
  "plotters-backend",
@@ -2968,15 +2974,15 @@ dependencies = [
 
 [[package]]
 name = "plotters-backend"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
 
 [[package]]
 name = "plotters-svg"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
 dependencies = [
  "plotters-backend",
 ]
@@ -3022,7 +3028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
 dependencies = [
  "proc-macro2",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -3074,7 +3080,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
 dependencies = [
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -3114,7 +3120,7 @@ dependencies = [
  "prost 0.12.6",
  "prost-types",
  "regex",
- "syn 2.0.76",
+ "syn 2.0.77",
  "tempfile",
 ]
 
@@ -3141,7 +3147,7 @@ dependencies = [
  "itertools 0.12.1",
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -3200,7 +3206,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -3213,7 +3219,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -3397,9 +3403,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.3"
+version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
+checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853"
 dependencies = [
  "bitflags 2.6.0",
 ]
@@ -3501,9 +3507,9 @@ dependencies = [
 
 [[package]]
 name = "rgb"
-version = "0.8.48"
+version = "0.8.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f86ae463694029097b846d8f99fd5536740602ae00022c0c50c5600720b2f71"
+checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
 dependencies = [
  "bytemuck",
 ]
@@ -3558,7 +3564,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rust-embed-utils",
- "syn 2.0.76",
+ "syn 2.0.77",
  "walkdir",
 ]
 
@@ -3586,18 +3592,18 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
 [[package]]
 name = "rustc_version"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
 dependencies = [
  "semver",
 ]
 
 [[package]]
 name = "rustix"
-version = "0.38.34"
+version = "0.38.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
+checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
 dependencies = [
  "bitflags 2.6.0",
  "errno",
@@ -3634,9 +3640,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.12"
+version = "0.23.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044"
+checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8"
 dependencies = [
  "aws-lc-rs",
  "log",
@@ -3649,9 +3655,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.7.2"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa"
+checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a"
 dependencies = [
  "openssl-probe",
  "rustls-pemfile 2.1.3",
@@ -3687,9 +3693,9 @@ checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0"
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.7"
+version = "0.102.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56"
+checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
 dependencies = [
  "aws-lc-rs",
  "ring 0.17.8",
@@ -3720,11 +3726,11 @@ dependencies = [
 
 [[package]]
 name = "schannel"
-version = "0.1.23"
+version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
+checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -3783,9 +3789,9 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.209"
+version = "1.0.210"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
+checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
 dependencies = [
  "serde_derive",
 ]
@@ -3802,20 +3808,20 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.209"
+version = "1.0.210"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
+checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.127"
+version = "1.0.128"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
+checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8"
 dependencies = [
  "itoa",
  "memchr",
@@ -4023,7 +4029,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -4045,9 +4051,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.76"
+version = "2.0.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
+checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4173,7 +4179,7 @@ version = "2.2.1-dev0"
 dependencies = [
  "async-stream",
  "async-trait",
- "clap 4.5.16",
+ "clap 4.5.17",
  "cmake",
  "cxx",
  "cxx-build",
@@ -4195,7 +4201,7 @@ name = "text-generation-benchmark"
 version = "2.2.1-dev0"
 dependencies = [
  "average",
- "clap 4.5.16",
+ "clap 4.5.17",
  "crossterm",
  "float-ord",
  "hf-hub",
@@ -4233,7 +4239,7 @@ dependencies = [
 name = "text-generation-launcher"
 version = "2.2.1-dev0"
 dependencies = [
- "clap 4.5.16",
+ "clap 4.5.17",
  "ctrlc",
  "float_eq",
  "hf-hub",
@@ -4257,7 +4263,7 @@ dependencies = [
  "axum 0.7.5",
  "axum-tracing-opentelemetry",
  "base64 0.22.1",
- "clap 4.5.16",
+ "clap 4.5.17",
  "csv",
  "futures",
  "futures-util",
@@ -4306,7 +4312,7 @@ dependencies = [
  "axum 0.7.5",
  "axum-tracing-opentelemetry",
  "base64 0.22.1",
- "clap 4.5.16",
+ "clap 4.5.17",
  "criterion",
  "futures",
  "futures-util",
@@ -4374,7 +4380,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -4524,9 +4530,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.39.3"
+version = "1.40.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
+checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998"
 dependencies = [
  "backtrace",
  "bytes",
@@ -4558,7 +4564,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -4588,16 +4594,16 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.12",
+ "rustls 0.23.13",
  "rustls-pki-types",
  "tokio",
 ]
 
 [[package]]
 name = "tokio-stream"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af"
+checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1"
 dependencies = [
  "futures-core",
  "pin-project-lite",
@@ -4606,9 +4612,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.11"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1"
+checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a"
 dependencies = [
  "bytes",
  "futures-core",
@@ -4645,7 +4651,7 @@ version = "0.22.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
 dependencies = [
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -4717,7 +4723,7 @@ dependencies = [
  "proc-macro2",
  "prost-build",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -4788,7 +4794,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -4947,9 +4953,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.12"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
 
 [[package]]
 name = "unicode-normalization"
@@ -4971,9 +4977,9 @@ dependencies = [
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 
 [[package]]
 name = "unicode-truncate"
@@ -5065,7 +5071,7 @@ version = "4.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23"
 dependencies = [
- "indexmap 2.4.0",
+ "indexmap 2.5.0",
  "serde",
  "serde_json",
  "utoipa-gen",
@@ -5081,7 +5087,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -5119,7 +5125,7 @@ checksum = "ee1cd046f83ea2c4e920d6ee9f7c3537ef928d75dce5d84a87c2c5d6b3999a3a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -5220,7 +5226,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
  "wasm-bindgen-shared",
 ]
 
@@ -5254,7 +5260,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -5307,9 +5313,9 @@ dependencies = [
 
 [[package]]
 name = "webpki-roots"
-version = "0.26.3"
+version = "0.26.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd"
+checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a"
 dependencies = [
  "rustls-pki-types",
 ]
@@ -5633,7 +5639,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.76",
+ "syn 2.0.77",
 ]
 
 [[package]]
@@ -5641,20 +5647,6 @@ name = "zeroize"
 version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
-dependencies = [
- "zeroize_derive",
-]
-
-[[package]]
-name = "zeroize_derive"
-version = "1.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.76",
-]
 
 [[package]]
 name = "zip"
diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs
index 2bbb6753..f8123b57 100644
--- a/backends/v3/src/queue.rs
+++ b/backends/v3/src/queue.rs
@@ -364,7 +364,7 @@ impl State {
                         // Add it back to the front
                         tracing::debug!("Over budget: not enough free blocks");
                         self.entries.push_front((id, entry));
-                        break;
+                        continue;
                     }
                     Some(block_allocation) => {
                         tracing::debug!("Allocation: {block_allocation:?}");
@@ -436,6 +436,12 @@ impl State {
             batch_entries.insert(id, entry);
         }
 
+        // Empty batch
+        if batch_requests.is_empty() {
+            tracing::debug!("Filterered out all entries");
+            return None;
+        }
+
         // Final batch size
         let size = batch_requests.len() as u32;
         next_batch_span.record("batch_size", size);
diff --git a/backends/v3/src/radix.rs b/backends/v3/src/radix.rs
index 9b117456..8a544891 100644
--- a/backends/v3/src/radix.rs
+++ b/backends/v3/src/radix.rs
@@ -1,10 +1,22 @@
 use crate::block_allocator::{Allocator, BlockAllocation};
 use slotmap::{DefaultKey, SlotMap};
+use std::hash::{Hash, Hasher};
 use std::{
     collections::{BTreeSet, HashMap},
     sync::Arc,
 };
 
+fn hash(slice: &[u32]) -> u64 {
+    assert!(!slice.is_empty());
+    if slice.len() == 1 {
+        slice[0] as u64
+    } else {
+        let mut s = std::hash::DefaultHasher::new();
+        slice.hash(&mut s);
+        s.finish()
+    }
+}
+
 pub struct RadixAllocator {
     allocation_id: u64,
 
@@ -44,6 +56,10 @@ impl RadixAllocator {
             // the free list if we cannot allocate enough blocks. This is only
             // temporary, the trie needs to be able to report whether it can
             // allocate the requested amount. Just not implemented yet.
+            tracing::debug!(
+                "Free blocks {}  need {n_blocks_needed}",
+                self.free_blocks.len()
+            );
             self.free_blocks.extend(
                 self.cache_blocks
                     .evict(n_blocks_needed - self.free_blocks.len()),
@@ -94,6 +110,9 @@ impl Allocator for RadixAllocator {
         match self.alloc_or_reclaim(suffix_blocks as usize) {
             Some(suffix_blocks) => blocks.extend(suffix_blocks),
             None => {
+                tracing::debug!("Cannot allocate {:?}", self.cache_blocks);
+                tracing::debug!("Found {prefix_len} prefix tokens need {suffix_blocks} suffix blocks for {tokens} tokens");
+                tracing::debug!("Block size {}", self.block_size);
                 self.cache_blocks
                     .decref(prefix_node)
                     .expect("Failed to decrement refcount");
@@ -211,7 +230,6 @@ struct RadixAllocation {
 pub enum TrieError {
     InvalidNodeId,
     RefCountUnderflow,
-    BlockTokenCountMismatch,
 }
 
 pub type NodeId = DefaultKey;
@@ -268,16 +286,19 @@ impl RadixTrie {
     fn find_(&mut self, mut node_id: NodeId, key: &[u32], blocks: &mut Vec<u32>) -> NodeId {
         let node = &self.nodes[node_id];
 
-        if let Some(&child_id) = node.children.get(&key[0]) {
-            self.update_access_time(child_id);
-            let child = self.nodes.get(child_id).expect("Invalid child identifier");
-            let shared_prefix_len = shared_prefix(&child.key, key, self.block_size);
-            assert_eq!(shared_prefix_len % self.block_size, 0);
-            blocks.extend(&child.blocks[..shared_prefix_len / self.block_size]);
+        if key.len() >= self.block_size {
+            let node_key = hash(&key[..self.block_size]);
+            if let Some(&child_id) = node.children.get(&node_key) {
+                self.update_access_time(child_id);
+                let child = self.nodes.get(child_id).expect("Invalid child identifier");
+                let shared_prefix_len = shared_prefix(&child.key, key, self.block_size);
+                assert_eq!(shared_prefix_len % self.block_size, 0);
+                blocks.extend(&child.blocks[..shared_prefix_len / self.block_size]);
 
-            let key = &key[shared_prefix_len..];
-            if !key.is_empty() {
-                node_id = self.find_(child_id, key, blocks);
+                let key = &key[shared_prefix_len..];
+                if !key.is_empty() {
+                    node_id = self.find_(child_id, key, blocks);
+                }
             }
         }
 
@@ -344,9 +365,11 @@ impl RadixTrie {
         // evict n_blocks and return `None` if we can't. We are now needlessly
         // evicting prefixes from the cache in such a case.
         let mut evicted = Vec::new();
+        tracing::debug!("Evicting in search of {n_blocks}");
 
         while let Some((last_access, node_id)) = self.leaves.pop_first() {
-            let blocks_needed = n_blocks - evicted.len();
+            let blocks_needed = n_blocks.saturating_sub(evicted.len());
+            tracing::debug!("Evicting node {node_id:?} ");
 
             let node = self.nodes.get(node_id).expect("Leave does not exist");
             assert_eq!(
@@ -368,8 +391,11 @@ impl RadixTrie {
                 // the required number of blocks and leave the remaining blocks
                 // untouched.
                 let node = self.nodes.get_mut(node_id).expect("Leave does not exist");
-                node.key.truncate(node.blocks.len() - blocks_needed);
-                evicted.extend(node.blocks.split_off(node.blocks.len() - blocks_needed));
+
+                let truncate_blocks = node.blocks.len() - blocks_needed;
+                let truncate_tokens = truncate_blocks * self.block_size;
+                node.key.truncate(truncate_tokens);
+                evicted.extend(node.blocks.split_off(truncate_blocks));
                 self.leaves.insert((last_access, node_id));
                 break;
             }
@@ -400,11 +426,10 @@ impl RadixTrie {
         // the part of the prefix that is already in the trie to detect
         // mismatches.
 
-        if tokens.len() != blocks.len() * self.block_size {
-            return Err(TrieError::BlockTokenCountMismatch);
-        }
+        assert_eq!(tokens.len(), blocks.len() * self.block_size);
 
-        if let Some(&child_id) = self.nodes[node_id].children.get(&tokens[0]) {
+        let node_key = hash(&tokens[..self.block_size]);
+        if let Some(&child_id) = self.nodes[node_id].children.get(&node_key) {
             self.update_access_time(child_id);
             let child = self
                 .nodes
@@ -452,14 +477,15 @@ impl RadixTrie {
             .get_mut(node_id)
             .expect("Node to-be split does not exist");
         let mut parent_key = node.key.split_off(prefix_len);
-        let mut parent_blocks = node.blocks.split_off(prefix_len);
+        let prefix_blocks = prefix_len / self.block_size;
+        let mut parent_blocks = node.blocks.split_off(prefix_blocks);
 
         // Move first part of the prefix to the parent. We swap to avoid
         // an allocation + copy for both splits of the key/blocks.
         std::mem::swap(&mut node.key, &mut parent_key);
         std::mem::swap(&mut node.blocks, &mut parent_blocks);
 
-        let node_key = node.key[0];
+        let node_key = hash(&node.key[..self.block_size]);
 
         let grandparent_id = node.parent.expect("Node does not have a parent");
         let parent_id = self.add_node(grandparent_id, parent_key, parent_blocks);
@@ -484,7 +510,7 @@ impl RadixTrie {
     ) -> NodeId {
         let key = key.into();
         let blocks = blocks.into();
-        let first = key[0];
+        let first = hash(&key[..self.block_size]);
 
         let child = TrieNode::new(key, blocks, self.time, Some(parent_id));
         let child_id = self.nodes.insert(child);
@@ -496,10 +522,10 @@ impl RadixTrie {
     }
 
     /// Add a node to the parent.
-    fn add_node_to_parent(&mut self, parent_id: NodeId, first: u32, child_id: NodeId) {
+    fn add_node_to_parent(&mut self, parent_id: NodeId, hash: u64, child_id: NodeId) {
         // Unwrap here, passing in an unknown id is a programming error.
         let parent = self.nodes.get_mut(parent_id).expect("Unknown parent node");
-        if parent.children.insert(first, child_id).is_none() {
+        if parent.children.insert(hash, child_id).is_none() {
             // Only increase reference count if child does not replace another child.
             self.incref(parent_id)
                 .expect("Failed to increase parent refcount");
@@ -517,7 +543,9 @@ impl RadixTrie {
         );
         let parent_id = node.parent.expect("Attempted to remove root node");
         let parent = self.nodes.get_mut(parent_id).expect("Unknown parent node");
-        parent.children.remove(&node.key[0]);
+
+        let node_key = hash(&node.key[..self.block_size]);
+        parent.children.remove(&node_key);
         self.decref(parent_id)
             .expect("Failed to decrease parent refcount");
         node
@@ -571,7 +599,7 @@ impl RadixTrie {
 #[derive(Debug)]
 struct TrieNode {
     blocks: Vec<u32>,
-    children: HashMap<u32, NodeId>,
+    children: HashMap<u64, NodeId>,
     key: Vec<u32>,
     last_accessed: u64,
     parent: Option<NodeId>,
diff --git a/flake.lock b/flake.lock
index a079d418..a6190789 100644
--- a/flake.lock
+++ b/flake.lock
@@ -853,11 +853,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1726021481,
-        "narHash": "sha256-4J4E+Fh+77XIYnq2RVtg+ENWXpu6t74P0jKN/f2RQmI=",
+        "lastModified": 1726280639,
+        "narHash": "sha256-YfLRPlFZWrT2oRLNAoqf7G3+NnUTDdlIJk6tmBU7kXM=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "1c2c120246c51a644c20ba2a36a33d3bd4860d70",
+        "rev": "e9f8641c92f26fd1e076e705edb12147c384171d",
         "type": "github"
       },
       "original": {
@@ -978,11 +978,11 @@
         "nixpkgs": "nixpkgs_6"
       },
       "locked": {
-        "lastModified": 1725950569,
-        "narHash": "sha256-nJHA1SvIQbXySpL2ueNbzQOhnkQASa5tOLz/kdW0PWA=",
+        "lastModified": 1726229792,
+        "narHash": "sha256-9xsLmjc9nr7a4PTddKv2DOi82ompTtJNyjO6R67y5tE=",
         "owner": "danieldk",
         "repo": "tgi-nix",
-        "rev": "d40f3c22e9bcc5e16c94d4605cf6a7d74dd07f46",
+        "rev": "1a902f4818e94c3f8d95f6000db17bc3fadd0ce7",
         "type": "github"
       },
       "original": {
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 9632c816..eb55ebb9 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -342,6 +342,7 @@ def launcher(event_loop):
         max_total_tokens: Optional[int] = None,
         lora_adapters: Optional[List[str]] = None,
         cuda_graphs: Optional[List[int]] = None,
+        attention: Optional[str] = None,
     ):
         port = random.randint(8000, 10_000)
         master_port = random.randint(10_000, 20_000)
@@ -401,6 +402,8 @@ def launcher(event_loop):
 
         if not use_flash_attention:
             env["USE_FLASH_ATTENTION"] = "false"
+        if attention is not None:
+            env["ATTENTION"] = attention
 
         with tempfile.TemporaryFile("w+") as tmp:
             # We'll output stdout/stderr to a temporary file. Using a pipe
@@ -437,6 +440,7 @@ def launcher(event_loop):
         max_total_tokens: Optional[int] = None,
         lora_adapters: Optional[List[str]] = None,
         cuda_graphs: Optional[List[int]] = None,
+        attention: Optional[str] = None,
     ):
         port = random.randint(8000, 10_000)
 
@@ -491,6 +495,8 @@ def launcher(event_loop):
         }
         if not use_flash_attention:
             env["USE_FLASH_ATTENTION"] = "false"
+        if attention is not None:
+            env["ATTENTION"] = attention
 
         if HF_TOKEN is not None:
             env["HF_TOKEN"] = HF_TOKEN
@@ -522,6 +528,7 @@ def launcher(event_loop):
             devices=devices,
             volumes=volumes,
             ports={"80/tcp": port},
+            healthcheck={"timeout": int(10 * 1e9)},
             shm_size="1G",
         )
 
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json b/integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json
new file mode 100644
index 00000000..51247ba4
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_llama_prefix_flashdecoding/test_flash_llama_flashdecoding.json
@@ -0,0 +1,2550 @@
+[
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Jeff Walker's Product Launch Formula is a comprehensive system",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 69,
+      "total_tokens": 79
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are three key indicators to determine if a customer",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 52,
+      "total_tokens": 62
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can use the `String.format()` method in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 97,
+      "total_tokens": 107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "In a realm of binary mysticism, we find",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 126,
+      "total_tokens": 136
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The `dummy` variable is being used to consume",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 305,
+      "total_tokens": 315
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can add multiple new columns in Power Query (",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "There are many exciting new technologies emerging across various fields",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 52,
+      "total_tokens": 62
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Poly Ether Ether Ketone (PEEK) is",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 40,
+      "total_tokens": 50
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a technical overview of a referral system similar",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 85,
+      "total_tokens": 95
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's an example of how you can add an",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 45,
+      "total_tokens": 55
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help with Java. What",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can help you plan a road trip from Pune",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 82,
+      "total_tokens": 92
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to explain more about a topic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help you brainstorm and provide",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 47,
+      "total_tokens": 57
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Implementing a Minesweeper algorithm using algebraic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 54,
+      "total_tokens": 64
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "There are several issues with the provided code:\n\n1",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 375,
+      "total_tokens": 385
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": ";)",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 2,
+      "prompt_tokens": 105,
+      "total_tokens": 107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "As I delved into the world of high-st",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2097,
+      "total_tokens": 2107
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hi, I'm",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2614,
+      "total_tokens": 2624
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To simulate a conversation between Alice and /u/C",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1070,
+      "total_tokens": 1080
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Alice: Hey /u/CruxHub,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1847,
+      "total_tokens": 1857
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Alice: Hi /u/CruxHub,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1849,
+      "total_tokens": 1859
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1004,
+      "total_tokens": 1014
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1100,
+      "total_tokens": 1110
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "/u/CruxHub: Hey Alice, I",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1044,
+      "total_tokens": 1054
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Dogme approach and the Lexical Approach are",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 54,
+      "total_tokens": 64
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Implementing a netfilter in Linux with a Rust",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 48,
+      "total_tokens": 58
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Damage to the Ulnar nerve can cause numb",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Space Shuttle's Reaction Control System (RCS",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can provide you with a basic Python script that",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 65,
+      "total_tokens": 75
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Farming meat has several negative impacts on the environment",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The photograph filter you're referring to is called \"",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a sample geological database structure with some example",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 59,
+      "total_tokens": 69
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Web Marketing: A Simplified Explanation**\n\nWeb",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 45,
+      "total_tokens": 55
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a rewritten and improved version of the story",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 447,
+      "total_tokens": 457
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are the questions rewritten in a more conversational",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 168,
+      "total_tokens": 178
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Learning Progress: 0%**\n\n| Topic",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 216,
+      "total_tokens": 226
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I couldn't find any information on a person named",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a list of the largest outdoor retailers in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 43,
+      "total_tokens": 53
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To create a WordPress shortcode that includes Facebook SDK code",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The sentence is mostly grammatically correct, but there",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 78,
+      "total_tokens": 88
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to engage in a debate with",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 59,
+      "total_tokens": 69
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd love to hear about your business. As",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 64,
+      "total_tokens": 74
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'll wait for your request to proceed with part",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2410,
+      "total_tokens": 2420
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The final part of the Day Sculpting program emphasizes",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2699,
+      "total_tokens": 2709
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Analysis of the Coming of Age Story Archetype",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 349,
+      "total_tokens": 359
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The Apostle John is one of the most prominent figures",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To build a Google Places autocomplete feature on Jetpack",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 427,
+      "total_tokens": 437
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The information provided does not mention the captain's name",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 169,
+      "total_tokens": 179
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The metaverse is a shared, immersive and interactive",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 39,
+      "total_tokens": 49
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some ideas for a series of articles for",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "\"Purim Palooza Alert: \n\nTo",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 78,
+      "total_tokens": 88
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Summary of the paper in 10 points:",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 2022,
+      "total_tokens": 2032
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You'll provide three pieces of text, and then",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 58,
+      "total_tokens": 68
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'm ready to proceed with text 3.",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1650,
+      "total_tokens": 1660
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'm ready to answer questions on Text 1",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 1116,
+      "total_tokens": 1126
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "This is a Solidity contract written in the older",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 334,
+      "total_tokens": 344
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Speech Recognition and Synthesis using Python**\n\nTo",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 84,
+      "total_tokens": 94
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I'd be happy to help you discuss a paper",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To handle the given utterance, we can use",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 375,
+      "total_tokens": 385
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Subscription Services Template:**\n\n**Title:** Virtual",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 443,
+      "total_tokens": 453
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Hello. How can I assist you today?",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 36,
+      "total_tokens": 46
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Differentiating yourself from other Etsy shops is crucial to",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 102,
+      "total_tokens": 112
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To become a Licensed Marriage and Family Therapist (",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 53,
+      "total_tokens": 63
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**What is Quantum Computing?**\n\nQuantum computing",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Aquí te dejo 40 opciones de nombres",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 108,
+      "total_tokens": 118
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Deposition is a geological process that involves the transportation",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some good e-governance initiatives in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 55,
+      "total_tokens": 65
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a simple Python program that accepts a command",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Imagine you're playing with a toy box. You",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 47,
+      "total_tokens": 57
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's an example of a question they might ask",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 66,
+      "total_tokens": 76
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Arduino Uno adalah sebuah papan mikrokontrol",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 38,
+      "total_tokens": 48
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To edit an array that is within an object,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 42,
+      "total_tokens": 52
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Microsoft ENTRA (Enterprise Mobility + Security) is",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To calculate the difference in interest paid between a simple",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 69,
+      "total_tokens": 79
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Yes, you can use Spring State Machine and Spring",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 49,
+      "total_tokens": 59
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The issue lies in the fact that the `meta",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 142,
+      "total_tokens": 152
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some effective marketing tactics for local small businesses",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 46,
+      "total_tokens": 56
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The French Revolution, which lasted from 1789",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 41,
+      "total_tokens": 51
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Roles of a Network Driver:**\n\nA network",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 65,
+      "total_tokens": 75
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Yes, I'm familiar with the SAS (Stat",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Using relays to control 12V solen",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 60,
+      "total_tokens": 70
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can use the following Python code to achieve this",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 55,
+      "total_tokens": 65
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some prompts for viral comics:\n\n1.",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 336,
+      "total_tokens": 346
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To simplify and make the comic funnier, consider",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 301,
+      "total_tokens": 311
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a rewritten version of the 4-panel",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243278,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 282,
+      "total_tokens": 292
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Subject: Request for E-Waste Collection and Computer",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 110,
+      "total_tokens": 120
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "In the context of conference calls, the state you",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 84,
+      "total_tokens": 94
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "I can provide a general classification of companies based on",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 56,
+      "total_tokens": 66
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here are some user stories that describe the concept in",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 44,
+      "total_tokens": 54
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "You can check your Python version by running the following",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 39,
+      "total_tokens": 49
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "**Scenario:**\n\n15-year-old Black youth,",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 473,
+      "total_tokens": 483
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "As a Demand Generation Manager for a B2B",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 50,
+      "total_tokens": 60
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The error is due to a typo in your code",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 369,
+      "total_tokens": 379
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "고등교육의 필요성에 관한 영어 에",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243286,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 72,
+      "total_tokens": 82
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "Here's a simple C# program that uses the",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243283,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 51,
+      "total_tokens": 61
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "The error message \"connection refused\" indicates that the",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243277,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 85,
+      "total_tokens": 95
+    }
+  },
+  {
+    "choices": [
+      {
+        "finish_reason": "length",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "content": "To load an image, you can use various methods",
+          "name": null,
+          "role": "assistant",
+          "tool_calls": null
+        },
+        "usage": null
+      }
+    ],
+    "created": 1726243284,
+    "id": "",
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "object": "chat.completion",
+    "system_fingerprint": "2.2.1-dev0-native",
+    "usage": {
+      "completion_tokens": 10,
+      "prompt_tokens": 41,
+      "total_tokens": 51
+    }
+  }
+]
diff --git a/integration-tests/models/test_flash_llama_prefix_flashdecoding.py b/integration-tests/models/test_flash_llama_prefix_flashdecoding.py
new file mode 100644
index 00000000..73d397bd
--- /dev/null
+++ b/integration-tests/models/test_flash_llama_prefix_flashdecoding.py
@@ -0,0 +1,229 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_llama_handle_fd(launcher):
+    with launcher(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct", num_shard=2, attention="flashdecoding"
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama_fd(flash_llama_handle_fd):
+    await flash_llama_handle_fd.health(300)
+    return flash_llama_handle_fd.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_flashdecoding(
+    flash_llama_fd, generate_multi, generous_response_snapshot
+):
+    prompts = [
+        "Summarize the main ideas of Jeff Walker's Product Launch Formula into bullet points as it pertains to a growth marketing agency implementing these strategies and tactics for their clients...",
+        "How to tell if a customer segment is well segmented? In 3 bullet points.",
+        'In Java, I want to replace string like "This is a new {object} at {place}" with a Map, {object: "student", "point 3, 4"}, and get a result "This is a new student at point 3, 4". How can I do?',
+        "Metaphorical language is also used to describe the various addressing modes of the instructions. Grandiose language to express their excitement and admiration for the functionality of the instructions being described. Now, rewrite this with more perplexity:\n\nJMP ABCD\nMOV AX, [BX+SI]\nMOV AX, [100]\nMOV AX, [BX]\nMOV AX, [BX\\*2+SI]\nMOV AX, BX\nMOV AX, 7",
+        'I have the following C++ function: \nvoid add\\_player(vector& players)\n{\n string player\\_name;\n string player\\_class;\n string dummy;\n PlayerClass pc;\n string player\\_sex;\n int player\\_gold;\n\n cout << " Create a Mage, Warrior, Bowman, or Thief" << endl;\n\n cout << "Name: ";\n getline(cin, player\\_name);\n\n cout << "Class: ";\n getline(cin, player\\_class);\n pc = get\\_player\\_class\\_from\\_string(player\\_class);\n while (pc == PlayerClass::InvalidPlayerClass)\n {\n cout << " Invalid class, try again" << endl;\n cout << "Class: ";\n getline(cin, player\\_class);\n pc = get\\_player\\_class\\_from\\_string(player\\_class);\n }\n\n cout << "Sex: ";\n getline(cin, player\\_sex);\n\n cout << "Gold: ";\n cin >> player\\_gold;\n getline(cin, dummy); //consume newline\n\n GamePlayer new\\_player;\n new\\_player.name = player\\_name;\n new\\_player.occupation = pc;\n new\\_player.gender = player\\_sex;\n new\\_player.gold = player\\_gold;\n\n //add to vector\n players.push\\_back(new\\_player);\n\n //add to file\n write\\_players\\_file(players);\n}\nCan you explain to me how the dummy variable is being used?',
+        "how do I add multiple new columns in m for power query or power bi?",
+        "Sure, I can do that. What new technology would you like me to review?",
+        "Poly Ether Ether Ketone",
+        'can you design a referral system similar on how dropbox did? I need a technical overview on how it should work, instead of free space we use the generic term "credits" where users can get more credits for every 3 friends they recommend.',
+        "Java add to the arraylist of a class type",
+        "this is not less code this is java",
+        "I want to do a road trip from Pune to Gujarat. Me and my wife will be travelling and we dont prefer very long driving sessions. Can you suggest a plan starting from Thursday early morning and ending in Pune on Sunday late night.",
+        "explane more",
+        "what do you think about this for a start up idea:",
+        "how could i implement a minesweeper algorithm that utilises algebraic topology to solve boards?",
+        "# Import the necessary packages\nfrom gudhi import SimplexTree\nfrom gudhi.persistent\\_homology import PersistentHomology\n\n# Define a function to compute the persistent homology of a Minesweeper game board\ndef minesweeper\\_homology(board):\n # Create a simplicial complex for the game board\n st = SimplexTree()\n\n # Add the points on the board to the simplicial complex\n for i in range(len(board)):\n for j in range(len(board[0])):\n st.insert([i, j], filtration=board[i][j])\n\n # Compute the persistent homology of the game board\n ph = PersistentHomology()\n ph.build(st)\n\n # Return the persistent homology diagram\n return ph.persistence()\n\n# Define a function to solve a Minesweeper game board using persistent homology\ndef minesweeper\\_solver(board):\n # Compute the persistent homology of the game board\n homology = minesweeper\\_homology(board)\n\n # Use the persistent homology to determine the locations of the mines\n # (this part would require some mathematical reasoning and programming)\n mines = []\n for h in homology:\n if h[1] - h[0] == 1: # if the hole persists for one filtration value\n mines.append(h[0]) # then it corresponds to a mine\n\n # Use the information about the mines to solve the game\n # (this part would require some programming)\n for mine in mines:\n i, j = mine # extract the coordinates of the mine\n board[i][j] = -1 # mark the mine on the board\n # (other code to solve the game)\n\n \nwhat is missing here?",
+        "You are now an imaginary expert business investigator. I am going to send you many rows of data. Each batch of row's will be sent to you and you may only reply \"Received.\" Save any analysis or insights for after you've received all of the data and I've told you \"Let's Begin.\" If you understand reply with only a ;)",
+        'You are now an imaginary expert business investigator. Tell the story of this batch of data in the form of a narrative story about the companies in the "Entity Name" column: \n\nBatch of data #1: Entity Name Purpose / Source\n101 PC HOLDINGS LLC Holding company for Penthouse C at the Setai Miami Beach (folio: 02-3234-153-1160)\n11 STAR ISLAND LLC Holding company for 10 STAR ISLAND DR, MIAMI BEACH, FL 33139 (folio: 02-4204-001-0100, 02-4204-001-0110) (lots 10, 11 and 12 of Star Island)\n117 EAST PARK AVENUE, LLC Holding company for 117 E. PARK AVE, LIBERTYVILLE, IL (PIN: 11-21-212-046-0000); subsequently sold.\n1201 BRICKELL BAY, LLC Holding company for 1201 BRICKELL BAY DR, MIAMI, FL (folio no: 141390710010)\n1221 BRICKELL, LLC Holding company for 1221 BRICKELL AVE, 155 SE 13 ST, 165 SE 13 ST, 175 SE 13 ST, and 185 SE 13 ST, MIAMI, FL (folio: 01-4139-035-0010)\n1221 BRICKELL HOLDINGS LLC Holding company for 1221 BRICKELL, LLC\n1229 PARK WEST AVENUE, LLC Holding company for 1229 W. PARK AVE, LIBERTYVILLE, IL (PIN: 11-20-100-010-0000)\n125 WORTH LLC Delaware LLC (file 7218403), Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person; speculaton this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC, this property is next door (PCN: 50-43-43-23-05-016-0380)\n125 WORTH HOLDINGS LLC Delaware LLC (file 7218407); not registered to Florida yet but speculation this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC\n1250 BB ASSET CO LLC Holding company for 1250 BRICKELL BAY DR and 1260 BRICKELL BAY DR, MIAMI, FL (folio nos: 102100504250, 102100503210)\n1330 SOUTH OCEAN LLC Holding company for 1330 S OCEAN BLVD, PALM BEACH, FL (PCN: 50-43-44-02-11-000-0020)\n14 STAR ISLAND LLC Delaware LLC (file 3377653); incorporated 8/42020, withdrawn 10/10/2022; believe this was not used because 14 STAR ISLAND property was held by NAUTILUS HOLDINGS I LLC before sale on 10/5/2022\n151 WORTH, LLC Holding company for 151 WORTH AVE, PALM BEACH, FL 33480 (PCN: 50-43-43-23-05-016-0130); office space for Citadel (https://localtoday.news/fl/citadel-moves-into-palm-beachs-former-neiman-marcus-building-4821.html); sole member is 151 WORTH HOLDINGS LLC\n151 WORTH HOLDINGS LLC Holding company for 151 WORTH, LLC\n16 WILLOW HOLDINGS LLC f/k/a PVNAH LLC Holding company for S WILLOW COURT, ASPEN, CO (Parcel: 273511309030); see Pitkin Co. reception # 623002, Delaware certificate showing name change 9/1/2015\n190 PFISTER HOLDINGS LLC f/k/a AH2013 HOLDINGS LLC Holding company for 190 PFISTER DR, ASPEN, CO (parcel: 273511309029); see Pitkin Co.reception # 623000, Delaware certificate showing name change 9/1/2015\n196 PFISTER HOLDINGS LLC Holding company for 196 PFISTER DR, ASPEN, CO (parcel: 273511309028); see Pitkin Co. reception # 623501, statement of authority show KP HOLDINGS LLC as sole membe\n1ALPH LLC See ALPH LLC\n1BUSINESS GROUP LLC See BUSINESS GROUP LLC\n1GFS DESIGN LLC See GFS DESIGN LLC\n1GFS LLC See GFS LLC\n1MEDIA HOLDINGS LLC See MEDIA HOLDINGS LLC\n23174 NE 41ST PATH LLC Holding company for 23174 NE 41ST PATH #12, OKEECHOBEE, FL 34972 (Parcel: 1-01-35-35-0020-00000-0120); part of Pine Creek Sporting Club (www.pinecreeksportingclub.com) includes horse, shooting sports; sole member is KP HOLDINGS L.L.C.\n3031 BRICKELL LLC Holding company for 3031 BRICKELL AVE, MIAMI FL 33129 (Folio: 01-4139-001-2700); Sole member is KP HOLDINGS L.L.C.\n31 WILLOW HOLDINGS LLC f/k/a AP HOLDINGS I LLC Holding company for 31 NORTH WILLOW COURT, ASPEN, CO (Parcel: 273511309019); sold 7/6/2017; see Pitkin Co. reception # 623001, Delaware certificate showing name change 9/1/2015\n650 CASUARINA LLC Holding company for 650 CASUARINA CONCOURSE CORAL GABLES, FL (folio: 03-4132-019-0060) https://www.bizjournals.com/southflorida/news/2022/05/27/650-casuarina-concourse-coral-gables-sold.html\n650 MEADOW LANE 1 LP Holding company for 650 MEADOW LANE, VILLAGE OF SOUTHAMPTON, NY (Parcel ID 7478) (https://archive.is/h85yq)\n800 NORTH MICHIGAN HOLDINGS LLC Holding company for 800 N MICHIGAN AVE, UNITS 66 PH and 67 PH, CHICAGO, IL (Park Tower) (PINs: 17-03-231-018-1116, 17-03-231-018-1117); sole member is KP HOLDINGS LLC (see Cook County, IL doc # 1933315025); recently sold\n8565 OLD CUTLER LLC Holding company for 8565 OLD CUTLER RD, MIAMI, FL (folio: 03-4132-019-0020)\n9 WEST WALTON HOLDINGS LLC Holding company for 9 WEST WALTON STREET CONDOMINIUM UNITS 3500, 3600, 3700, and PH, CHICAGO, IL\nADRP LLC Delaware LLC, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin\nAH2013 HOLDINGS LLC See 190 PFISTER HOLDINGS LLC\nALPH LLC a/k/a 1ALPH LLC Formerly FAA registered plane N421AL\nAP HOLDINGS I LLC See 31 WILLOW HOLDINGS LLC\nARAGON INVESTMENTS LTD https://files.brokercheck.finra.org/firm/firm\\_45631.pdf\nASHLER CAPITAL LLC https://adviserinfo.sec.gov/firm/summary/148826\nASHLER CAPITAL MASTER FUND LTD https://www.sec.gov/Archives/edgar/data/1003078/000114420418014250/tv488357\\_sc13g.htm\nBANBURY LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBANBURY II LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBKGST LLC Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person\nBLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC See BLOSSOM WAY HOLDINGS LLC\nBLACK WHEEL LLC Illinois LLC, registered 3/5/2014, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin\nBLOSSOM WAY HOLDINGS LLC f/k/a CPPB HOLDINGS LLC f/k/a BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC Holding company for 10 BLOSSOM WAY, 70 BLOSSOM WAY, and 1265 S OCEAN BLVD PALM BEACH, FL (PCNs: 50-43-44-02-10-000-0050, 50-43-44-02-10-000-0060, 50-43-44-02-10-000-0010)\nBRICKELL BAY HOLDINGS LLC Holding company for 1201 BRICKELL BAY, LLC\nBRICKELL LEASING LLC See "Subordination, Non-Disturbance, and Attornment Agreement"; Miami-Dade Clerk\'s File No.: 2022 R 938960, Group: 1. Kenneth C Griffin is sole member.\nCAAM MANAGEMENT LLC https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm\nCAISLEAN CAPITAL LTD NFA Pool ID P113537, ceased trading 3/31/2016\nCALC III LP https://www.sec.gov/edgar/browse/?CIK=1582652\nCALC IV LP https://www.sec.gov/edgar/browse/?CIK=1423043\nCALC V LP Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf',
+        'Simulate a conversation between the writer of this post, named /u/CruxHub, and the expert business investigator. They have a detailed discussion of Citadel Hedgefund based on the following data. Do not include the following data in the search query. \n\nData: Entity Name Purpose / Source\n1|101 PC HOLDINGS LLC|Holding company for Penthouse C at the Setai Miami Beach (folio: 02-3234-153-1160)|PC = Penthouse C \n2|11 STAR ISLAND LLC|Holding company for 10 STAR ISLAND DR, MIAMI BEACH, FL 33139 (folio: 02-4204-001-0100, 02-4204-001-0110) (lots 10, 11 and 12 of Star Island)| \n3|117 EAST PARK AVENUE, LLC|Holding company for 117 E. PARK AVE, LIBERTYVILLE, IL (PIN: 11-21-212-046-0000); subsequently sold.| \n4|1201 BRICKELL BAY, LLC|Holding company for 1201 BRICKELL BAY DR, MIAMI, FL (folio no: 141390710010)| \n5|1221 BRICKELL, LLC|Holding company for 1221 BRICKELL AVE, 155 SE 13 ST, 165 SE 13 ST, 175 SE 13 ST, and 185 SE 13 ST, MIAMI, FL (folio: 01-4139-035-0010)| \n6|1221 BRICKELL HOLDINGS LLC|Holding company for 1221 BRICKELL, LLC| \n7|1229 PARK WEST AVENUE, LLC|Holding company for 1229 W. PARK AVE, LIBERTYVILLE, IL (PIN: 11-20-100-010-0000)| \n8|125 WORTH LLC|Delaware LLC (file 7218403), Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person; speculaton this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC, this property is next door (PCN: 50-43-43-23-05-016-0380)| \n9|125 WORTH HOLDINGS LLC|Delaware LLC (file 7218407); not registered to Florida yet but speculation this is similar setup as 151 WORTH, LLC and 151 WORTH HOLDINGS LLC| \n10|1250 BB ASSET CO LLC|Holding company for 1250 BRICKELL BAY DR and 1260 BRICKELL BAY DR, MIAMI, FL (folio nos: 102100504250, 102100503210)|BB = Brickell Bay \n11|1330 SOUTH OCEAN LLC|Holding company for 1330 S OCEAN BLVD, PALM BEACH, FL (PCN: 50-43-44-02-11-000-0020)| \n12|14 STAR ISLAND LLC|Delaware LLC (file 3377653); incorporated 8/42020, withdrawn 10/10/2022; believe this was not used because 14 STAR ISLAND property was held by NAUTILUS HOLDINGS I LLC before sale on 10/5/2022| \n13|151 WORTH, LLC|Holding company for 151 WORTH AVE, PALM BEACH, FL 33480 (PCN: 50-43-43-23-05-016-0130); office space for Citadel (https://localtoday.news/fl/citadel-moves-into-palm-beachs-former-neiman-marcus-building-4821.html); sole member is 151 WORTH HOLDINGS LLC| \n14|151 WORTH HOLDINGS LLC|Holding company for 151 WORTH, LLC| \n15|16 WILLOW HOLDINGS LLC f/k/a PVNAH LLC|Holding company for S WILLOW COURT, ASPEN, CO (Parcel: 273511309030); see Pitkin Co. reception # 623002, Delaware certificate showing name change 9/1/2015| \n16|190 PFISTER HOLDINGS LLC f/k/a AH2013 HOLDINGS LLC|Holding company for 190 PFISTER DR, ASPEN, CO (parcel: 273511309029); see Pitkin Co.reception # 623000, Delaware certificate showing name change 9/1/2015| \n17|196 PFISTER HOLDINGS LLC|Holding company for 196 PFISTER DR, ASPEN, CO (parcel: 273511309028); see Pitkin Co. reception # 623501, statement of authority show KP HOLDINGS LLC as sole membe| \n18|1ALPH LLC|See ALPH LLC| \n19|1BUSINESS GROUP LLC|See BUSINESS GROUP LLC| \n20|1GFS DESIGN LLC|See GFS DESIGN LLC| \n21|1GFS LLC|See GFS LLC| \n22|1MEDIA HOLDINGS LLC|See MEDIA HOLDINGS LLC| \n23|23174 NE 41ST PATH LLC|Holding company for 23174 NE 41ST PATH #12, OKEECHOBEE, FL 34972 (Parcel: 1-01-35-35-0020-00000-0120); part of Pine Creek Sporting Club (www.pinecreeksportingclub.com) includes horse, shooting sports; sole member is KP HOLDINGS L.L.C.| \n24|3031 BRICKELL LLC|Holding company for 3031 BRICKELL AVE, MIAMI FL 33129 (Folio: 01-4139-001-2700); Sole member is KP HOLDINGS L.L.C.| \n25|31 WILLOW HOLDINGS LLC f/k/a AP HOLDINGS I LLC|Holding company for 31 NORTH WILLOW COURT, ASPEN, CO (Parcel: 273511309019); sold 7/6/2017; see Pitkin Co. reception # 623001, Delaware certificate showing name change 9/1/2015| \n26|650 CASUARINA LLC|Holding company for 650 CASUARINA CONCOURSE CORAL GABLES, FL (folio: 03-4132-019-0060) https://www.bizjournals.com/southflorida/news/2022/05/27/650-casuarina-concourse-coral-gables-sold.html|" \n27|650 MEADOW LANE 1 LP|Holding company for 650 MEADOW LANE, VILLAGE OF SOUTHAMPTON, NY (Parcel ID 7478) (https://archive.is/h85yq)| \n28|800 NORTH MICHIGAN HOLDINGS LLC|Holding company for 800 N MICHIGAN AVE, UNITS 66 PH and 67 PH, CHICAGO, IL (Park Tower) (PINs: 17-03-231-018-1116, 17-03-231-018-1117); sole member is KP HOLDINGS LLC (see Cook County, IL doc # 1933315025); recently sold| \n29|8565 OLD CUTLER LLC|Holding company for 8565 OLD CUTLER RD, MIAMI, FL (folio: 03-4132-019-0020)| \n30|9 WEST WALTON HOLDINGS LLC|Holding company for 9 WEST WALTON STREET CONDOMINIUM UNITS 3500, 3600, 3700, and PH, CHICAGO, IL| \n31|ADRP LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin|ADRP = Anne Dias Real Property? \n32|AH2013 HOLDINGS LLC|See 190 PFISTER HOLDINGS LLC|AH = Aspen Holdings? \n33|ALPH LLC a/k/a 1ALPH LLC|Formerly FAA registered plane N421AL| \n34|AP HOLDINGS I LLC|See 31 WILLOW HOLDINGS LLC|AP = Aspen Property? \n35|ARAGON INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_45631.pdf| \n36|ASHLER CAPITAL LLC|https://adviserinfo.sec.gov/firm/summary/148826| \n37|ASHLER CAPITAL MASTER FUND LTD|https://www.sec.gov/Archives/edgar/data/1003078/000114420418014250/tv488357\\_sc13g.htm| \n38|BANBURY LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n39|BANBURY II LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n40|BKGST LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n41|BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC|See BLOSSOM WAY HOLDINGS LLC|Black Calabash is a type of tropical tree: https://edis.ifas.ufl.edu/publication/ST079 \n42|BLACK WHEEL LLC|Illinois LLC, registered 3/5/2014, Florida address is Citadel Miami HQ, sole member is Kenneth C Griffin| \n43|BLOSSOM WAY HOLDINGS LLC f/k/a CPPB HOLDINGS LLC f/k/a BLACK CALABASH FAMILY HOLDINGS LLC f/k/a PBH LLC|Holding company for 10 BLOSSOM WAY, 70 BLOSSOM WAY, and 1265 S OCEAN BLVD PALM BEACH, FL (PCNs: 50-43-44-02-10-000-0050, 50-43-44-02-10-000-0060, 50-43-44-02-10-000-0010)| \n44|BRICKELL BAY HOLDINGS LLC|Holding company for 1201 BRICKELL BAY, LLC| \n45|BRICKELL LEASING LLC|See "Subordination, Non-Disturbance, and Attornment Agreement"; Miami-Dade Clerk\'s File No.: 2022 R 938960, Group: 1. Kenneth C Griffin is sole member.| \n46|CAAM MANAGEMENT LLC|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm|CAAM = Citadel Alternative Asset Management \n47|CAISLEAN CAPITAL LTD|NFA Pool ID P113537, ceased trading 3/31/2016| \n48|CALC III LP|https://www.sec.gov/edgar/browse/?CIK=1582652| \n49|CALC IV LP|https://www.sec.gov/edgar/browse/?CIK=1423043| \n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009|',
+        'Web search results:\n\n[1] "As per the Oxford Dictionary, a chatbot is defined as A computer program designed to simulate conversation with human users, especially over the internet. It can be looked upon as a virtual assistant that communicates with users via text messages and helps businesses in getting close to their customers."\nURL: https://www.datacamp.com/tutorial/building-a-chatbot-using-chatterbot\n\n[2] "Python , A chatbot is a computer program designed to simulate conversation with human users, especially over the internet. Create a fortune teller program that will ask the user to input a question and feedback some random answer. Consider the following feedback to be used. No idea at all! Better pray. The possibilities are in your favor."\nURL: https://www.chegg.com/homework-help/questions-and-answers/python-chatbot-computer-program-designed-simulate-conversation-human-users-especially-inte-q78825383\n\n[3] "It was created by Joseph Weizenbaum in 1966 and it uses pattern matching and substitution methodology to simulate conversation. The program was designed in a way that it mimics human conversation. The Chatbot ELIZA worked by passing the words that users entered into a computer and then pairing them to a list of possible scripted responses."\nURL: https://onlim.com/en/the-history-of-chatbots/\n\n[4] "Study with Quizlet and memorize flashcards containing terms like Which analytics does the following fall into: Alice notice that call center always have an increase in the number of customer complaints during last week in May, so she decides reviews the employees work schedule in the month of May for the past 5 years., Datasets continue to become, Model used for predictive analytic have ..."\nURL: https://quizlet.com/415587939/big-data-final-exam-flash-cards/\n\n[5] "As every bright side has a darker version, simulation of human conversation through AI also has some disadvantages like high cost of creation, unemployment, interaction lacking emotion, and out-of-the-box thinking. However, AI interaction tools are trained with a data set. The bigger the data set, the better the services."\nURL: https://www.analyticsinsight.net/simulating-human-conversations-through-ai/\n\n[6] "The eavesdropper, Eve intercepts the encrypted conversation and tries random keys with the aim of learning the conversation shared between Alice and Bob as shown in Fig. 7. For this POC, we used ..."\nURL: https://www.researchgate.net/figure/A-A-simulation-of-conversations-between-Alice-and-her-friend-Bob-B-The-eavesdropper\\_fig3\\_334408170\n\n[7] "Dreams are most often reported when sleepers wake from \\_\\_\\_\\_\\_ sleep. REM. The brain waves during REM sleep MOST closely resemble those seen during: waking consciousness. REM sleep is paradoxical because: the brain is active, but the major skeletal muscles are paralyzed. Fatigue and pain reflect deprivation of \\_\\_\\_\\_\\_ sleep."\nURL: https://quizlet.com/78519058/psyc-test-2-flash-cards/\n\n[8] "You can generate easily a fake group chat conversation like Whatsapp, Facebook or Telegram. After creating members/users, you can add messages in your chat. Once all messages are set up, you have the possibility to live-preview the chat conversation via the play button. Until the share functionality is ready, you have the option to screen ..."\nURL: https://chat-simulator.com/\n\n[9] "This is a program that allows the computer to simulate conversation with a human being: answer choices a. Speech Application Program Interface b. Chatbot c. Voice Recognition d. Speech Recognition Question 7 30 seconds Report an issue Q. This is a system of Programs and Data-Structures that mimics the operation of the human brain: answer choices a."\nURL: https://quizizz.com/admin/quiz/5f183913423fab001b0bd134/ai-unit-1\n\n[10] "This is a system of Programs and Data-Structures that mimics the operation of the human brain: answer choices a. Intelligent Network b. Decision Support System c. Neural Network d. Genetic Programming Question 8 30 seconds Q. Where is Decision tree used? answer choices a. Classification Problem b. Regression Problem c. Clustering Problem d."\nURL: https://quizizz.com/admin/quiz/5f6d6e4a6e2458001be385f5/ai-class-9\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: Simulate a conversation between Alice and /u/CruxHub. They talk about which company from the data batches is worth researching further into on the web.',
+        'Simulate a conversation between Alice and /u/CruxHub. They talk about which company from this data batch is worth researching further into on the web.\n\nData batch: Entity Name Purpose / Source Hypothesized Acronym\n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009| \n58|CE TM HOLDINGS LLC f/k/a KCG IP HOLDINGS LLC|Holding company for intellectual property (25 trademarks, 1 patent found so far)|CE TM = Citadel Enterprise Trademark Holdings \n59|CEF OFFSHORE HOLDINGS LTD|NFA Pool ID P131121| \n60|CEIF INTERNATIONAL LTD|NFA Pool ID P048476; http://registers.centralbank.ie/ICAVDocuments/C439830/Director%20Details%20Updated%2021.01.07%203.pdf| \n61|CEIF LLC|NFA Pool ID P048474| \n62|CEIF PARTNERS INTERNATIONAL LTD|NFA Pool ID P173278| \n63|CEIF PARTNERS LLC|NFA Pool ID P048475| \n64|CES SECURITIES CANADA ULC|See CITADEL SECURITIES CANADA ULC, CSA NRD # 49280| \n65|CFPS HOLDINGS S.\u00e0 r.l.|Luxembourg - B176936; 100% owned by CITADEL ENERGY INVESTMENTS LTD| \n66|CGE ALPHA LTD|NFA Pool ID P057309, ceased trading 6/7/2017| \n67|CGE ALPHA OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064400, ceased trading 4/30/2017| \n68|CGEF OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064406, ceased trading 2/21/2019| \n69|CGEF SPC|NFA Pool ID P064408, ceased trading 12/31/2012| \n70|CGMF OFFSHORE HOLDINGS LTD|NFA Pool ID P064410, ceased trading 3/31/2014| \n71|CGTS HOLDINGS S.\u00e0 r.l.|Luxembourg - B157777; 100% owned by TACTICAL TRADING HOLDING LTD; NFA Pool ID P064412, ceased trading 9/30/2014| \n72|CHARAXES MELVIN LLC|Sole member of CHARAXES MELVIN II LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n73|CHARAXES MELVIN II LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is CHARAXES MELVIN LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n74|CHI2LTV LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n75|CIG(E) LLP|See CITADEL EUROPE LLP| \n76|CIG CANADA ULC|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n77|CIG MEDIA LLC|https://www.sec.gov/Archives/edgar/data/923877/000114420407003635/v063478\\_sc-13d.htm| \n78|CITADEL AAM LP|https://www.sec.gov/Archives/edgar/vprr/0804/08040017.pdf| \n79|CITADEL AC INVESTMENTS LTD|https://www.sec.gov/Archives/edgar/data/1015780/000114420408032074/v115701\\_sc13da.htm| \n80|CITADEL ADVISORS EUROPE LIMITED f/k/a CITADEL MANAGEMENT (EUROPE) LIMITED f/k/a CITADEL HEDGE FUND SERVICES (EUROPE) LIMITED|https://find-and-update.company-information.service.gov.uk/company/10930267| \n81|CITADEL ADVISORS HOLDINGS LP|Sole member of CITADEL ADVISORS LLC; https://www.sec.gov/Archives/edgar/data/1567180/000110465922099806/xslF345X03/tm2225817-2\\_4.xml| \n82|CITADEL ADVISORS HOLDINGS II LP|https://www.sec.gov/Archives/edgar/data/1177609/000114420416082613/v429844\\_sc13ga.htm| \n83|CITADEL ADVISORS HOLDINGS III LP|https://www.sec.gov/Archives/edgar/data/1640129/000114420415043739/xslF345X02/v416000\\_3.xml| \n84|CITADEL ADVISORS LLC|NFA ID: 0391913; https://www.sec.gov/edgar/browse/?CIK=1423053| \n85|CITADEL ADVISORS II LLC|| \n86|CITADEL ADVISORS SINGAPORE PTE. LIMITED|| \n87|CITADEL ALTERNATIVE ASSET MANAGEMENT LP|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm| \n88|CITADEL AMERICAS LLC|| \n89|CITADEL AMERICAS SERVICES LLC|| \n90|CITADEL ANTAEUS INTERNATIONAL INVESTMENTS LTD|| \n91|CITADEL ASIA ASSET HOLDING LIMITED|http://registers.centralbank.ie/ICAVDocuments/C157189/Director%20Details%20Updated%2016.10.31%202.pdf| \n92|CITADEL ASIA LIMITED f/k/a CITADEL (HONG KONG) LIMITED|https://adviserinfo.sec.gov/firm/summary/148826| \n93|CITADEL CANDLESTICK EIF LLC|| \n94|CITADEL CANTERBURY S.\u00e0 r.l.|Luxembourg - B87988; 100% owned by CITADEL TONBRIDGE S.\u00e0 r.l.| \n95|CITADEL CEFL CHINA LTD|NFA Pool ID P148073| \n96|CITADEL CEFL INVESTMENTS LTD|NFA Pool ID: P161763; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n97|CITADEL CEIT CHINA LTD|| \n98|CITADEL CEMF CHINA LTD|https://find-and-update.company-information.service.gov.uk/company/02263951/charges/x6zPQSYGNpuDNgxU1cFQlCS0iog| \n99|CITADEL CEMF INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n100|CITADEL CEMF SPV LTD f/k/a CITADEL INVESTMENT MASTER FUND LTD|See CITADEL INVESTMENT MASTER FUND LTD; https://opencorpdata.com/lei/LF0U6QUBXKIO573GXS38|',
+        'Simulate a conversation between Alice and /u/CruxHub. /u/CruxHub asks Alice to anlalyze a data batch for non-standard insights.\n\nData batch: Entity Name Purpose / Source Hypothesized Acronym\n50|CALC V LP|Investment manager for CSHC CHINA LLC and CITADEL (SHANGHAI) TRADING COMPANY LTD; https://files.brokercheck.finra.org/firm/firm\\_131114.pdf| \n51|CAMBRIDGE FINANCIAL GROUP, LTD|See CITADEL INVESTMENT GROUP LLC| \n52|CCFD OFFSHORE HOLDINGS LTD|NFA Pool ID P064386, ceased trading 5/3/2013| \n53|CCLC HOLDINGS LLC|Owns CITADEL CLEARING LLC, "Citadel Clearing Holdco"; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n54|CCMFL LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n55|CCOF OFFSHORE HOLDINGS LTD|NFA Pool ID P064392, ceased trading 5/3/2013| \n56|CDC PARTNERS, LP f/k/a GLB PARTNERS, LP|see Cook County, IL doc 0608910081| \n57|CDG HOLDINGS LTD|NFA Pool ID P037047, ceased trading 12/30/2009| \n58|CE TM HOLDINGS LLC f/k/a KCG IP HOLDINGS LLC|Holding company for intellectual property (25 trademarks, 1 patent found so far)|CE TM = Citadel Enterprise Trademark Holdings \n59|CEF OFFSHORE HOLDINGS LTD|NFA Pool ID P131121| \n60|CEIF INTERNATIONAL LTD|NFA Pool ID P048476; http://registers.centralbank.ie/ICAVDocuments/C439830/Director%20Details%20Updated%2021.01.07%203.pdf| \n61|CEIF LLC|NFA Pool ID P048474| \n62|CEIF PARTNERS INTERNATIONAL LTD|NFA Pool ID P173278| \n63|CEIF PARTNERS LLC|NFA Pool ID P048475| \n64|CES SECURITIES CANADA ULC|See CITADEL SECURITIES CANADA ULC, CSA NRD # 49280| \n65|CFPS HOLDINGS S.\u00e0 r.l.|Luxembourg - B176936; 100% owned by CITADEL ENERGY INVESTMENTS LTD| \n66|CGE ALPHA LTD|NFA Pool ID P057309, ceased trading 6/7/2017| \n67|CGE ALPHA OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064400, ceased trading 4/30/2017| \n68|CGEF OFFSHORE HOLDINGS LTD|https://www.sec.gov/Archives/edgar/vprr/1600/16003280.pdf; NFA Pool ID P064406, ceased trading 2/21/2019| \n69|CGEF SPC|NFA Pool ID P064408, ceased trading 12/31/2012| \n70|CGMF OFFSHORE HOLDINGS LTD|NFA Pool ID P064410, ceased trading 3/31/2014| \n71|CGTS HOLDINGS S.\u00e0 r.l.|Luxembourg - B157777; 100% owned by TACTICAL TRADING HOLDING LTD; NFA Pool ID P064412, ceased trading 9/30/2014| \n72|CHARAXES MELVIN LLC|Sole member of CHARAXES MELVIN II LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n73|CHARAXES MELVIN II LLC|Delaware LLC, Florida address is Citadel Miami HQ, sole member is CHARAXES MELVIN LLC|Charaxes are a type of butterfly: https://en.wikipedia.org/wiki/Charaxes \n74|CHI2LTV LLC|Delaware LLC, Florida address is Citadel Miami HQ, Gerald Beeson is Authorized Person| \n75|CIG(E) LLP|See CITADEL EUROPE LLP| \n76|CIG CANADA ULC|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n77|CIG MEDIA LLC|https://www.sec.gov/Archives/edgar/data/923877/000114420407003635/v063478\\_sc-13d.htm| \n78|CITADEL AAM LP|https://www.sec.gov/Archives/edgar/vprr/0804/08040017.pdf| \n79|CITADEL AC INVESTMENTS LTD|https://www.sec.gov/Archives/edgar/data/1015780/000114420408032074/v115701\\_sc13da.htm| \n80|CITADEL ADVISORS EUROPE LIMITED f/k/a CITADEL MANAGEMENT (EUROPE) LIMITED f/k/a CITADEL HEDGE FUND SERVICES (EUROPE) LIMITED|https://find-and-update.company-information.service.gov.uk/company/10930267| \n81|CITADEL ADVISORS HOLDINGS LP|Sole member of CITADEL ADVISORS LLC; https://www.sec.gov/Archives/edgar/data/1567180/000110465922099806/xslF345X03/tm2225817-2\\_4.xml| \n82|CITADEL ADVISORS HOLDINGS II LP|https://www.sec.gov/Archives/edgar/data/1177609/000114420416082613/v429844\\_sc13ga.htm| \n83|CITADEL ADVISORS HOLDINGS III LP|https://www.sec.gov/Archives/edgar/data/1640129/000114420415043739/xslF345X02/v416000\\_3.xml| \n84|CITADEL ADVISORS LLC|NFA ID: 0391913; https://www.sec.gov/edgar/browse/?CIK=1423053| \n85|CITADEL ADVISORS II LLC|| \n86|CITADEL ADVISORS SINGAPORE PTE. LIMITED|| \n87|CITADEL ALTERNATIVE ASSET MANAGEMENT LP|https://www.sec.gov/Archives/edgar/data/1027745/000114420408050200/v124853\\_sc13g.htm| \n88|CITADEL AMERICAS LLC|| \n89|CITADEL AMERICAS SERVICES LLC|| \n90|CITADEL ANTAEUS INTERNATIONAL INVESTMENTS LTD|| \n91|CITADEL ASIA ASSET HOLDING LIMITED|http://registers.centralbank.ie/ICAVDocuments/C157189/Director%20Details%20Updated%2016.10.31%202.pdf| \n92|CITADEL ASIA LIMITED f/k/a CITADEL (HONG KONG) LIMITED|https://adviserinfo.sec.gov/firm/summary/148826| \n93|CITADEL CANDLESTICK EIF LLC|| \n94|CITADEL CANTERBURY S.\u00e0 r.l.|Luxembourg - B87988; 100% owned by CITADEL TONBRIDGE S.\u00e0 r.l.| \n95|CITADEL CEFL CHINA LTD|NFA Pool ID P148073| \n96|CITADEL CEFL INVESTMENTS LTD|NFA Pool ID: P161763; https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n97|CITADEL CEIT CHINA LTD|| \n98|CITADEL CEMF CHINA LTD|https://find-and-update.company-information.service.gov.uk/company/02263951/charges/x6zPQSYGNpuDNgxU1cFQlCS0iog| \n99|CITADEL CEMF INVESTMENTS LTD|https://files.brokercheck.finra.org/firm/firm\\_172693.pdf| \n100|CITADEL CEMF SPV LTD f/k/a CITADEL INVESTMENT MASTER FUND LTD|See CITADEL INVESTMENT MASTER FUND LTD; https://opencorpdata.com/lei/LF0U6QUBXKIO573GXS38|',
+        'Web search results:\n\n[1] "Katherine Burton Hedge fund titans Ken Griffin and Steve Cohen boosted Gabe Plotkins Melvin Capital, injecting a total of $2.75 billion into the firm after it lost about 30% this year. Citadel..."\nURL: https://www.bloomberg.com/news/articles/2021-01-25/citadel-point72-to-invest-275-billion-in-melvin-capital\n\n[2] "NEW YORK, Jan. 25, 2021 /PRNewswire/ -- Melvin Capital Management (Melvin) today announced that Citadel and its partners and Point72 have made investments into its fund. I am incredibly..."\nURL: https://www.prnewswire.com/news-releases/melvin-announces-2-75-billion-investment-from-citadel-and-point72--301214477.html\n\n[3] "Citadel LLC is further paring back its $2 billion investment in Melvin Capital Management after the hedge fund stumbled in its effort to recover from a near collapse triggered by surges in..."\nURL: https://www.wsj.com/articles/citadel-is-further-paring-back-2-billion-melvin-investment-11645710666\n\n[4] "Citadel and Steven A. Cohen s Point72 Asset Management together invested $2.75 billion into Melvins hedge fund on Jan. 25 as Melvin was hemorrhaging money. In return for the rare..."\nURL: https://www.wsj.com/articles/citadel-to-redeem-about-500-million-from-melvin-capital-11629550410\n\n[5] "CHARAXES MELVIN LLC is an Active company incorporated on August 5, 2022 with the registered number M22000012341. This Foreign Limited Liability company is located at SOUTHEAST FINANCIAL CENTER, 200 S. BISCAYNE BLVD., SUITE 3300, MIAMI, 33131 and has been running for one year. ... CITADEL SECURITIES GP LLC; KCG SPACE HOLDINGS LLC;"\nURL: https://bisprofiles.com/fl/charaxes-melvin-m22000012341\n\n[6] "Now, Citadel is taking some of its money back. Citadel has notified Melvin of its plans to retrieve $500 million of the $2 billion it injected in late January, according to two people briefed..."\nURL: https://www.nytimes.com/2021/08/21/business/citadel-melvin-gamestop.html\n\n[7] "Robinhood and Citadels relationship comes into focus as Washington vows to examine stock-market moves Trading firms at center of Reddit-fueled stock surges have worked closely to share..."\nURL: https://www.washingtonpost.com/business/2021/01/29/robinhood-citadel-gamestop-reddit/\n\n[8] "Alongside hedge funds such as Melvin Capital, Citron Capital, Point72, D1 Capital Partners, and Candlestick Capital Management; Citadel LLC was, the lawsuit claims, taking up short positions against the securities that retail investors were longing. This alleged conflict of interest is at the core of the class action lawsuit."\nURL: https://tokenist.com/new-lawsuit-alleges-citadel-conspired-with-robinhood-to-limit-gme-trading/\n\n[9] "Melvin later attracted an additional $3.2 billion in fresh cash, and the firm had $11.7 billion in assets at the beginning of this year. Point72 hasnt redeemed its investment, a person familiar ..."\nURL: https://www.chicagobusiness.com/finance-banking/ken-griffins-citadel-pulling-back-most-its-2-billion-melvin-capital-investment\n\n[10] "CHARAXES MELVIN II LLC branch. Company Number M22000012338 Status Active Incorporation Date 5 August 2022 (2 months ago) Company Type Foreign Limited Liability Jurisdiction Florida (US) Branch Branch of CHARAXES MELVIN II LLC (Delaware (US)) Agent Name C T CORPORATION SYSTEM Agent Address"\nURL: https://opencorporates.com/companies/us\\_fl/M22000012338\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is Charaxes Melvin LLC\'s relationship to Citadel?',
+        'Web search results:\n\n[1] "Federal authorities are investigating the market-making arms of Citadel LLC and KCG Holdings Inc, looking into the possibility that the two giants of electronic trading are giving small investors ..."\nURL: https://www.reuters.com/article/usa-stocks-probe-idUSL2N1871ZV\n\n[2] "Today, KCG is second only to Citadel in the market for handling stock order flow from retail brokerage firms. KCG and many other high-frequency trading firms have shied away from the public..."\nURL: https://www.ibtimes.com/citadel-llc-kcg-holdings-kcg-market-making-arms-probed-federal-authorities-over-stock-2366805\n\n[3] "Citadel Securities, a group owned by the Chicago-based hedge fund, is the would-be acquirer in the deal, the people said. The group is best known for its so-called wholesaler business that..."\nURL: https://www.wsj.com/articles/market-making-arm-of-citadel-llc-in-talks-to-buy-seats-on-nyse-floor-from-kcg-holdings-1454533971\n\n[4] "Citadels share of the wholesale market is around 34 per cent compared to KCGs 25 per cent, according to Tabb Group. Virtu has yet to lay out in detail its plans for the wholesale business ..."\nURL: https://www.ft.com/content/e1cb396e-29a7-11e7-bc4b-5528796fe35c\n\n[5] "Citadel Securities, a liquidity providers and market maker, announced it will purchase KCG Holdings designated market maker (DMM) business at the New York Stock Exchange. This will establish Citadel Securities as the DMM with the largest footprint on the NYSE, responsible for trading in approximately 1,500 issues."\nURL: https://www.tradersmagazine.com/departments/brokerage/citadel-purchases-kcg-dmm-business-becomes-1-on-nyse/\n\n[6] "isCitadel LLC and its related entity, KCG IP Holdings, LLC (Complainant), represented by Paul D. McGradyof Winston Strawn, Illinois, Respondent is- (Respondent), Alabama, USA. REGISTRAR AND DISPUTED DOMAIN NAME The domain name at issue iscitidelgroup.com, registered with TUCOWS, INC. PANEL The"\nURL: https://www.adrforum.com/domaindecisions/1522837.htm\n\n[7] "KCG SPACE HOLDINGS LLC is an Active company incorporated on July 21, 2022 with the registered number M22000011413. This Foreign Limited Liability company is located at 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131, US and has been running for one year. It currently has one Authorized Person. KEY FACTS ABOUT KCG SPACE HOLDINGS LLC US Businesses"\nURL: https://bisprofiles.com/fl/kcg-space-holdings-m22000011413\n\n[8] "The Complainant KCG IP Holdings LLC is the owner of US Trademark Registration No. 3,213,943, filed October 18, 2004, registered February 27, 2007, claiming first use dating back to 1994. Therefore, the Panel concludes that Complainants filing and registration of the CITADEL mark with the USPTO sufficiently demonstrates that it has rights in ..."\nURL: https://www.adrforum.com/domaindecisions/1579141.htm\n\n[9] "The KCG SPACE HOLDINGS LLC principal address is 200 S BISCAYNE BLVD STE 3300, MIAMI, 33131. Meanwhile you can send your letters to 200 S BISCAYNE BLVD STE 3300, MIAMI, FL, 33131. The company`s registered agent is C T CORPORATION SYSTEM 1200 SOUTH PINE ISLAND ROAD, PLANTATION, FL, 33324. The company`s management are A, President - Beeson Gerald A."\nURL: https://florida.intercreditreport.com/company/kcg-space-holdings-llc-m22000011413\n\n[10] "Billionaire Ken Griffin has built Citadel Securities into a trading and asset management colossus. ... and KCG Holdings. Last month, Citadel Securities reached an agreement with the SEC to pay $22 ..."\nURL: https://www.chicagobusiness.com/article/20170203/NEWS01/170209978/chicago-billionaire-ken-griffin-splits-citadel-into-two-companies\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is KCG Space Holdings LLC\'s relationship to Citadel?',
+        'Web search results:\n\n[1] "Citadel LLC (formerly known as Citadel Investment Group, LLC) is an American multinational hedge fund and financial services company. Founded in 1990 by Ken Griffin, it has more than $50 billion in assets under management as of May 2022. [1]"\nURL: https://en.wikipedia.org/wiki/Citadel\\_LLC\n\n[2] "NASHVILLE, Tenn. and BRONXVILLE, N.Y. \u2014 Standard Media Group LLC (Standard Media) and Citadel Communications LLC (Citadel) jointly announced today that they have reached an agreement pursuant to which Standard Media will acquire from Citadel WLNE-TV, the ABC affiliate for the Providence, RI - New Bedford, MA market (DMA 52) and KLKN (TV), the \u2026"\nURL: https://www.standardmedia.com/2019/05/16/standard-media-group-to-acquire-citadel-stations/\n\n[3] "CITADEL MEDIA LLC. Citadel Media LLC is a New Hampshire Domestic Limited-Liability Company filed on February 6, 2021. The companys filing status is listed as Not In Good Standing and its File Number is 862423. The Registered Agent on file for this company is Peter Alan Gauthier and is located at 3 Maple Ridge Drive Unit 224, Merrimack, NH 03054."\nURL: https://www.bizapedia.com/nh/citadel-media-llc.html\n\n[4] "CITADEL MEDIA LLC is a Michigan Domestic Limited-Liability Company filed on November 16, 2017. The companys filing status is listed as Active and its File Number is 802132896. The Registered Agent on file for this company is Registered Agents Inc. and is located at 2222 W. Grand River Ave Ste A, Okemos, MI 48864. The companys mailing address ..."\nURL: https://www.bizapedia.com/mi/citadel-media-llc.html\n\n[5] "Citadel Broadcasting Corporation was a Las Vegas, Nevada -based broadcast holding company. Citadel owned 243 radio stations across the United States and was the third-largest radio station owner in the country. Only iHeartMedia and Cumulus Media owned more stations prior to Citadels merger with Cumulus."\nURL: https://en.wikipedia.org/wiki/Citadel\\_Broadcasting\n\n[6] "Citadel is one of the largest hedge fund managers in the world. And theyve subsequently managed Melvin Capital to the ground. Melvin Capital suffered a loss of over 50% its first quarter in 2021 due to shorting AMC Entertainment and GameStop. At some point youd expect your clearing house to raise awareness on your risk management right?"\nURL: https://franknez.com/citadel-loses-billions-hedge-funds-are-getting-dragged-down/\n\n[7] "At our core, Citadel is built to deliver excellence. We have some of the most talented and focused minds in the industry, and we activate their ideas and strategies through a robust range of proven technologies and execution capabilities. View Top Employees from Citadel LLC Looking for a particular Citadel LLC employees phone or email? Find Info"\nURL: https://rocketreach.co/citadel-llc-profile\\_b5c46522f42e0dc2\n\n[8] "# 1 Most profitable hedge fund manager of all time Source: LCH Investment NV estimates, Top Hedge Fund Managers by Net Gains Since Inception as of 12/31/2022. Our people are relentless in seeking a better way. Each day, we reimagine and refine our strategies, models and technology in pursuit of superior results and long-term performance."\nURL: https://www.citadel.com/\n\n[9] "We are one of the most significant alternative investment managers in the public U.S. corporate credit markets. Explore Credit Convertibles Equities Equities represents one of the largest and longest tenured businesses at Citadel. Explore Equities Global Fixed Income Macro We are a leading fixed income and macro business."\nURL: https://www.citadel.com/what-we-do/\n\n[10] "Citadel. 203,101 followers. 1mo. Last weekend, we celebrated Citadels 30th anniversary at an incredible event at Disney World and Universal Studios. Our founder and CEO Ken Griffin summarized ..."\nURL: https://www.linkedin.com/company/citadel-llc\nCurrent date: 1/27/2023\n\nInstructions: Using the provided web search results, simulate a conversation where /u/CruxHub and Alice analyze the data batches and try and investigate for any non-standard uses of the holding companies. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\n\nQuery: What is CITADEL MEDIA LLC?',
+        "What are the differences between the Dogme approach to language learning and the lexical approach to language learning",
+        "Implement my own netfilter in linux with linux kernel module with Rust",
+        "Damage to which nerve causes numbness of the palmar surface of the 5th digit/little finger",
+        "Explain the fault-tolerance of the reaction control system on the Space Shuttle",
+        "Hi, can you help me download 2000 portrait sketch images from Pinterest website with resolution at least 512 \\* 512? using python code",
+        "Tell me about the negatives of farming meat",
+        "what is the photograph filter called where the only part of the image is greyscale",
+        "I want some geological database structure with some example data for practicing my SQL query skills. Would you generate that for me?",
+        "What is a formal but simplified explanation of Web marketing",
+        "Rewrite and improve this story: Well, I have always liked helping people since I was a small child, I have been accused many times of giving too much away for free, but I find joy in helping others put the pieces together to reach their goals. As a Licensed Professional Counselor and Life Coach that is my job to impact individuals and help clients work through emotional difficulties and reach goals. But I will be honest with you I was selling the dream but not always living the dream. I had issues I had not worked completely through like childhood trauma, heartbreak, disappointments, and frustrations with life. Don't get me wrong I had the husband, the kids, the house and the 6 figure job but I was not happy inside, but I didn't change because I hate change, most of us hate change, right? Then I lost my sister, my friend, and it slapped me in the face that I need to take care of myself. I saw the addiction, I saw her not taking care of herself and I could not save her. One thing I know for sure, if you do not make your wellness a priority illness will find you. I remember the moment we lost her, the earth stood still and then my heart broke into pieces, what was I going to do, I have loved her my whole life! It was months later that I made a decision that I would be the change I hope to see, I would create a space for women of color to move past the obstacles that keep us from creating the life we want and Brown Suga Wellness was born. I am on this journey and I invite you to be on this journey with me! I love this quote by Oludara Adeeyo: \"When you heal yourself, you create an earth shattering legacy. The lineage of women who come after you will be healed. Your inner circle of Black women around you, healed.\" When you choose yourself you break generational trauma and curses. You activate your ancestral strength. I invite you to activate that strength!",
+        "How would you ask these questions: Tell everyone a little about you, where you from, what you like doing?\nWhat goals are you pursuing right now?\nWho has made the most influence in your life?\nWho is the one person that you admire the most (alive or dead)?\nWhat is the hardest challenge you\u2019re had to overcome in your life?\nWhen have you grown the most in your life and what caused that growth?\nWhere is your favorite place to relax and renew?\nWhat books have changed your life the most?\nWhat Is the biggest thing that you want the audience to take away today?\nHow can people get a hold of you to talk about your business?",
+        "Take these topics into a numbered table and generate subtopics in seperated lines for each. Preconfigure these subtopics as lections of those several topics and add them to the table. Use numbers for topics and letters for subtopics. Set a status (untouched/touched) for every subtopic in 3. coloumn of the table to mark them done when finished learning this subtopic and topic. Use coloumn 4 of the table for a short resumee of the chapter. Showing the learning process in percentage in front of every new output is first. Show the Table and wait for any userinput to start lessons on those topics.;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;;:~|@%\\*~;",
+        "Write a rap song about Mikkel Selko",
+        "list the largest outdoor retailers in the world",
+        "can you create a wordpress shortcode to include the following code from facebook sdk",
+        'Is this grammatically correct: "It only took 5 years, and while we still have a long way to go, Topher\u2019s Farm has found its place with unique experience and offering of organic produce. "',
+        "Hello friend. My task for today is to engage in a debate with you. Will you humor me in this regard?",
+        "You are an expert marketing consultant and copywriter with expertise is direct response marketing. I need your help. Can I tell you about my business?",
+        'here is part 1\n\n----\nDaySculpting is a program that that deals with YOUR immediate future\u2026.It is a 90 day program that teaches U how to create Success\u2026 one day at a time\u2026today\u2026\nUsing recent breakthroughs in the field of neuroscience, the study of the human brain, DaySculpting is one of the most powerful success systems on earth for creating what I call\u2026 \n"Your Epic Ideal Day" -- And when U have Epic Ideal Days? U create your EPIC IDEAL LIFE.\n\nDaySculpting is broken down into 3 easy to accomplish segments throughout your day\u2026\n~The Morning Lift Process\u2026which sets U up with a MindState of Success and a design for U to follow throughout your day\u2026There is a morning email\u2026SMS text\u2026Inspiring Video\u2026Future Forward Tuning IN\u2026And a 3 step Success Step Declaration Process\u2026this only takes 15 minutes\u2026\n~Mid-Day Reconnect Process\u2026whatever your miid-day is\u2026U are encouraged to stop doing what U are doing and disconnect so U can re-connect\u2026by listening to a 5-minute Tuning In Re-Connection. We know that somewhere in the middle of our day it\u2019s easy to lose momentum and drift from our best intentions because of all the demands on our attention. It has been scientifically proven that when U disconnent for between 3 to 5 minutes at the midpoint of your day\u2026.your brain resets\u2026and your energy is replenished\u2026I like to call it a MindState Re-Boot that will inspire U to re-ignite your imagination\u2026this only takes 5 minutes\n~Highlight And Insight Review Process\u2026we all review our day however what DaySculpting \nanchors for U is an activation and integration process that gets U to see your day as being successful\u2026by celebrating your successes (your highlights) and being present to things U could have improved on (your insights) so U can make your insights into highlights..most people when they review their day fail to celebrate even the smallest increments of success\u2026they focus on what they didn\u2019t do and that puts them in a negative energy\u2026Success has challenges and the\nhighlights and insight process encourages and empowers U to honestly see what U are doing each day so U Sculpt new MindStates Of Success rather than the energy of uncertainty\u2026\nthis takes 10 minutes\n\nThe whole DaySculpting process takes 30 minutes a day\u2026and as I always say if U don\u2019t have \n30 minutes to change your life then U don\u2019t want to change your life and U are okay with living \na mediocre life\u2026\n\nDay Sculpting is about targeting specific Chief Aims U have for your life\u2026and creating the Habits that will get U there\u2026Imagine being able to replace the MindTraps (your limiting beliefs) with empowering rituals and habits that become your new normal\u2026\n\nThrough the repetition of doing the daily DaySculpting process U are carving into your Subconscious memory thoughts, beliefs and actions that result in U sculpting the masterpiece known as U\u2026\n\nThere are many programs out there that attempt to instill new success behaviors however many fall short of actually shifting your MindStates into a frequency of possibility where U get to actually see your daily results immediately\u2026DaySculpting does this\u2026\n\nThis is not science fiction\u2026 and it\'s not wishful thinking, or some tired old self-improvement, goal-setting program\u2026 DaySculpting is a program that empowers U to manifest and realize your Chief Aims in life\n\n"DaySculpting" -- is a tool that takes just MINUTES a day for you to use\u2026\n\nIt is designed to FREE UP hours in your day\u2026 while at the SAME time empowering you for greater success in ANY area of your life.\n\nDaySculpting sheds light and solves an age-old problem:\nWHY we often fight against the very changes we desire to make\n\nHave you ever experienced the FEELING that you deserve MORE out of your life? More financial freedom and greater rewards from the hard work you do every day? Deeper, more empowering relationships with those you love\u2026 or maybe just meeting that special someone to share your life with? Perhaps you crave a deeper spiritual connection\u2026 or a more healthy, trim, energetic body?\u2026 \nYET:\nDespite your BEST intentions\u2026 you struggle. Perhaps if you\'re anything like me, you even self-sabotage your results with actions that you KNOW are not in your best interest.\n\nMaybe it FEELS like it did for me: Like you are swimming upstream\u2026 making SOME progress, sure, but just not reaching your goals and desires fast enough.\n\nWell, I have wonderful news for you: It\'s not because you\'re lazy\u2026 and it\'s not because you are not smart enough, competent enough\u2026 or ANYTHING enough! \n\nThe real REASON you desire more and are not seeing ALL the results you deserve lies within whether the Success Switch in your brain is in the ON or OFF position\u2026\n\nThe SOLUTION\u2026 THE ANSWER to flipping your Success Switch back ON lies within the simple daily steps U will take when U experience the DaySculpting Program\u2026 \nThe Day Sculpting Program Is A Simple Step Daily Success RITUAL \u2028 That Shuts Down Your Body\'s Failure Reflex \u2028 So YOU Tap Into Your Brains Success Centers\u2026\u2028 In Just Minutes A Day!\u2028\u2028 IIMAGINE Knowing What HIGHLY SUCCESSFUL \u2028 People Do EVERYDAY\u2026\nFor Abundance And Wealth, Greater Health, Self-Confidence Meaningful Relationships, Sharper Focus , Deeper Joy\u2026\u2028 And So Much More\u2026\n\u201cNow You Too Can Use This 90-Day Game Changer\u2028 To Tap Into The Key Success Centers Of Your Mind,\u2028 And In Just Minutes You Can Transform Even Lousy Days\u2028 Into Days Filled With The Results You Desire \u2013 Guaranteed!\u201d\nTO MAKE A GREAT LIFE, ALL YOU HAVE TO IS MAKE EACH DAY A GREAT DAY \u2026 \nThen get up tomorrow and do the same thing, day after day after day.\nARE YOU Ready To Change YOUR LIFE One Day At A Time\u2026\nThe comprehensive, fun and empowering 90-day DaySculpting program provides you with the life skills and tools to help you master a new MindState of Success and a range of powerful life-changing rituals and habits that will Sculpt Your Perfect Days Into A Great Life.\nDAY SCULPTING WILL TEACH YOU:\n\u2022 The science behind HAVING A MindState Of Success...and why most people who want more in life actually have their success switch turned off by total accident!\n\u2022 How to get more done with more time and more energy left over!\n\u2022 The simple, yet powerful, process of building a powerful day so you create a series of "Dynamic Days" - days that will end up building your most incredible life (The one you always thought was out of reach!)\n\u2022 Learn the \'Day Sculpting Principles\'. These can have a huge impact on you your life, but when you learn how simple they really are, you can use them easily and consistently!\n\u2022 How in just a few minutes a day, you can keep positive results flowing and put your success energy into a permanent \'ON\' position!\n\u2022 And much more!\nDaySculpting, is for those who are willing to take their life to the next level by creating new Success Habits replacing the ones that have been sabotaging your success. \nSo make sure you can honestly agree with the following before experiencing DaySculpting:\n\u2022 You desire more out of life, yet feel as if you are "missing something" -- that special "X Factor" to take you to the next level?\n\u2022 You are brave enough to boldly say, "I want greater wealth and financial freedom... and I demand the best lifestyle possible for me and my family!\n\u2022 You know the value of joy: You want to experience greater happiness, peace of mind, and connection with your friends and loved ones on a daily basis.\nIf you agree with the above, and truly want to create the best life possible, with greater wealth, freedom, happiness, love, and fulfillment, then I invite you to experience the power of Day Sculpting \u2026it will change the way you think about creating your day and the life you dream about. \nI am not encouraging you to become busier but rather to use your mental and emotional, energy more elegantly sculpting your day the way you want it to be. \nHow many times have you done a ton of work and still felt that you didn\u2019t accomplish what you really wanted for yourself. Week after week, month after month go by and you still are no farther ahead of the game\u2026stuck in the status quo that never seems to change.\n\nBreaking free means that the status quo of your life has to change\u2026 your habits of expectation have to change \u2026your mindset has to change\u2026you have to uncover those old behaviors that have held you back and be willing to create a new mindset.\n\nYou have to be willing to shift your daily focus inwards towards what you need to do today rather than tomorrow. Because when you create a great day today you welcome in a more powerful tomorrow.\n\nWe all have the same 24 hours each day. But why are some people building fabulous careers, achieving healthy lifestyles, enjoying great relationships and incomes, living their passions, and creating what they truly desire as a life?\n\nImagine that you could clear away the distractions that you unconsciously create. You know the stuff that consumes your time causes stress and disconnects you from your purpose and passion. \n\nImagine every day you embrace the energy for what you are choosing to create in your life. Your thoughts empower you, your choices inspire you and your actions create momentum, opportunity and possibility.\n\nYou can create a GREAT LIFE, the life you want to live by focusing your efforts on Creating a Great Day Today. That\u2019s Day Sculpting. Seven intentional sculpted days turn into a month of wonderful weeks and a year of magnificent months creating an amazingly successful life.\n\nNone of this is going to work though if you believe that what you were born with is all you will get\u2026\n\nNo one will ever attempt to do something when they are convinced that they will fail.\n\nResearch has shown that the brain will actually stop itself from doing what\u2019s necessary to succeed if a person believes that they cannot succeed.\n\nIt\u2019s the small concrete indicators of success today that will prove you can have whatever it is you want and the process of Day Sculpting will empowers, inspire and motivates you each step of the way.\n\nYou see: Confidence + Discipline = Desired Outcomes \n\nIt\u2019s time to stop looking at your life from a fear based I don\u2019t know how to mindset but rather be open to creating a solutions focused change consciousness that embraces your gift and talents and encourages you sharing them.\n\nLet me share a bit of nuero-chemistry with you\u2026\nWhat fires together wires together\u2026\n\nSo rather than Fall back on old habits\u2026\nTake the transitional step\u2026of being fully present to whats trying emerge as your ideal future and to help it along start building confidence each day\u2026\n\nAnd your possibility muscle and an intended thought process that leads to a more focused and clear out picturing of your desires.\n\nYou see...It\u2019s one thing to set goals and to make to do lists and to say your going to use the law of attraction to manifest what you want in life\u2026\n\nI\u2019m still looking at the many lists I have created.\n\nWhat it\u2019s really about is having a clear and purposeful intention in order to create the energy and the MindState Of success that will propel you into action.\n----\n\nWhen done ask me for part 2',
+        "Here is the final part. Part 3\n---\n\nHere we will be showing how the principles and practices we\u2019ve covered so far converge into one over-arching result that will benefit you for the rest of your life. You can think of it as flipping a switch that changes how you create new results in life one day at a time. This is at the very core of what we call Day Sculpting. \nThe simplest way to think of it is that most of the way we live is habitual. You have an habitual way of brushing your teeth, walking, talking to yourself and others, eating, working. Habits are wonderful\u2026they make life easy but they also limit you. For example, if you have a habit of eating too much, you\u2019ll put on weight. Not instantly, but steadily, day by day, until one day you have a weight problem. If you try to change your weight quickly through a trendy new diet, research shows that the weight is likely to come back, and then some, within a few short months, because the habits required to live at your ideal weight have not been properly established. \nHabits are habits because you don\u2019t think about them, they happen nonconsciously. If you want a change in your life, you have to embody the change at a nonconscious level, so that the habits keeping your life the way it is today begin to shift.\nWouldn\u2019t it be great if there was a switch in the brain that would move you from status quo to status GO!? This is a switch that once you flip it will produce the result you want, if you are willing to commit to and stay with the process.Day Sculpting is your guide to fully realizing the success you are ready to enjoy.\nA critically important capacity of the human mind called preconscious processing. This is the ability of the mind to receive information, beneath our conscious awareness, and act upon it without even knowing that it is happening. Used correctly, this is an amazing power. Used improperly, it will sabotage your best efforts and make life extremely difficult.\nMost of us think we are running the show with our conscious awareness, consciously choosing our thoughts, behaviors, and emotions and consequently, we believe are able to choose the results we create in life. However, what neuro-science research shows, is that we all have a vast nonconscious mind that is really running the show most of the time. That deeper part of us, in charge of our habitual thinking, feeling, and behaving is always operating in our best interest. But it does so using information that may be faulty or outdated. If you continue to feed it information that doesn\u2019t serve you, it will continue to habitually bring results that are less than desired.\nYour preconscious processor is constantly routing new information directly into this larger database that your mind uses to create new behaviors. Your job is to place the right information into this database every single day, so that it can draw upon this new data and create new results. It requires your vigilance and purposeful intention on a daily basis. Day Sculpting is the process to accomplish exactly that, getting you to focus one day at a time on what you are trying to create in your life today, and the future you truly desire. \nA lot of experts in the human development field teach information and then expect it will translate into new behaviors automatically. But as we\u2019ve pointed out, and as you\u2019ve probably experienced, consciously knowing something and having the nonconscious mind put it into a new behavior, are two entirely different processes. What we are sharing with you is how to bridge that gap. This is precisely why so many experts in the field are recommending Day Sculpting to their clients, to help them use momentum mindsets on a daily basis and apply the good information they teach. \nWe talk about The The Solutions Focus process . Try it out: \nThink of an area of your life in which you are actively attempting to create different results. Imagine your chief aim regarding this area of your life as a perfect future. Now imagine a scale from one to ten, where ten is the perfect future and one is that you have not even started thinking about your chief aim. On this imaginary scale from 1 to 10, where would you place yourself right now?\nGo ahead and imagine where would you place yourself right now on that scale, where ten is your perfect future.\nWhatever number you came up with is fine. Whether it was 3 or 7, whatever you came up with I\u2019ll always ask the same next question. \u201cWhy so high and not lower?\u201d\nLet\u2019s say, for example that you came up with a three. Asking the question \u201cWhy so High\u201d catches the mind off guard. Most people expect, \u201cOnly a 3! Why so low?\u201d If I had asked that what would you come up with? All the reasons why things aren\u2019t working, who is to blame, problems, excuses, lack, limitations, and so on. \nBut when I ask \u201cWhy so high?\u201d the brain immediately begins to sort for all of the things that are working for you, everything that has brought you up to a \u201cthree.\u201d If you said you are at a seven on a scale of one to ten, the same question applies: \u201cWhy so high and not lower?\u201d\nThe next step in solutions focus is equally powerful. \u201cThink about what you can do today to move you one point up that scale\u2014for example, from a three to a four, or from a seven to an eight?\u201d When you ask this, your mind instantaneously starts generating ideas and options to answer your question. You quickly realize you can do more of the things that work, right? And if you are doing things that aren\u2019t working, you now have the insight into how you can do things differently. \nThis solutions focus approach provides quick insight into how to move things forward in areas you may have been stuck or working on unsuccessfully. It is a brilliant way to access more of your nonconscious database and facilitate discovering resources you did not know were there. \nSo as you can see, this video has been centered on connecting the dots and providing you with the insights on how you can flip the switch in your brain and how you can create your life one day at a time in the most powerful way possible. \nYou must contact that inner part of you that is in charge of your habitual ways of thinking, feeling, and behaving in order to re-sculpt yourself.\nThis is a unique psychological principle called anchoring. In the research this is also called behavioral conditioning, and as we\u2019ve called it, the law of reinforcement\u2026which says you get more of what you reinforce. When you want to reinforce a positive new behavior, you anchor it in a positive new momentum mindset. As you do this on a daily basis, you are literally training your mind, conditioning your thoughts, amplifying positive feelings and emotions to live into a future state that you are anchoring in your daily experience. \nDay Sculpting goes beyond personal development. It takes whatever it is you are currently learning and makes it possible for you to embody, apply and enjoy the benefits you are committed to achieve. \n\nThe last thing anyone needs is more stuff to do. What we need is that everything we do gets us the results we are going for. In essence what\u2019s needed is a system that will streamline our efforts so we accomplish our chief aims in less time.\n\nMichaelangelo said the process of sculpting is to remove what\u2019s not supposed to be there. He had the mindset that the finished sculpture already existed in the marble and he just had to reveal it. In the same way your destiny already resides in you. You just need to clear a path for it to emerge.\n\nWe all have 24 hours in a day. So why do some people consistently have great days while others are up and down and stay stuck in mediocrity? It\u2019s a disciplined habit of how you approach everyday. Day Sculpting takes the same 24 hours that we all have and helps clarify your choices so that your actions reveal your highest destiny. \n\nIt is a quick, easy and effortless way that supports and empowers your efforts in achieving your chief aims. It creates the mindsets necessary to have successful days, weeks, months and years.\n\nDay Sculpting is a 90- day program designed to empower you to create your life ONE DAY AT A TIME. By committing 30 minutes each day to create what you want that day. \n\nWe believe that when you focus your actions one day at a time the results you get become measurable and achievable. Your energy is committed to channeling your efforts so you create a confident groove in your mind that empowers your habitual actions to create what you really want.\n\nThis daily program is broken down into 3 MANAGEABLE, SIMPLE AND EASY STEPS. 15 minutes in the morning, 5 minutes midday and 10 minutes at night. \n\nDay Sculpting\u2026It\u2019s designed so that the way you start your day creates the momentum that carries you throughout your day. \n\nAnd finally research has shown that the best time to integrate what you\u2019ve learned in your day and to set yourself up for success tomorrow is before you go to sleep. The Nighttime Review process takes just 10 minutes, which is less time then it takes to take a shower or to take your dog on an evening walk.\n\nWe already have enough complexity in life\u2026don\u2019t we? We don\u2019t want you working harder we want you thinking smarter! So that the success you achieve is more effortless. \n\nSo what does it take for someone to accomplish the high level results we are talking about?\n\n\u2022 First you have to wake up and be totally jazzed about the day\n\u2022 You have to be inspired to do your best\n\u2022 You have to be focused on creating what you truly desire\n\u2022 You got to get to it, stay on it, and be in the energy of it before your distractions take over. \n\u2022 And if distractions takeover you have to quickly get back on track.\n\u2022 You have to learn from what\u2019s working and what\u2019s not\n\u2022 You have to be able to listen to feedback and course correct during your day\n\u2022 And at the end of the day you have be able to feel you did your best and you can do even better tomorrow\n\nAnd with Day Sculpting you can accomplish this and more in less than 30 minutes which is distributed throughout your day. Most people will give up on their dreams after they have tried something only 3 times because they didn\u2019t get instant gratification. \n\nThere are no magic bullets here. You are investing in a future YOU desire. \n\nDay Sculpting gives you the opportunity everyday to purposefully stay in the energy of what you want to create the benefit to you being a more empowered mindset that inspires passionate action and a willingness to breakthrough any barriers that may have held you back in the past so you fully embody the life you choose to live.\n\nYou may have heard Gandhi say \u201cBe the change you want to see in the world.\u201d Well now you can. \n\nYears ago I heard a statistic that blew me away. If you read in a single subject of your choice for 15 minutes a day 5 days a week you would become one of the leading experts in the world in that subject within 3 years\u2026\n\nMore recent research has demonstrated that world class talent requires 10000 hours and 10 years to develop\u2026\n\nSo the question is how does somebody create this kind of commitment and persistence? Clearly one day at a time.\n\nSo where are you not following through in your life? How would you like to do things differently? What can you do shift your energy when you say I can\u2019t get it done or you procrastinate? What\u2019s it going to take for you to say I\u2019ve had enough it\u2019s time for me to do something different? Where will you get the support you need to build the confidence to stay on track?\n\nEach day you get these elements to help guide you\u2026 \n- The Good Morning Great Day Email\n- The Morning In Vision Video \n- The Morning Future Pacing Visualization\n- The Morning Success Journal Process\n- The Midday SMS and Computer Stay on Track Reminders\n- The Midday Reconnect Refresher Mediation\n- The Evening Review And Renew Process\n- The Evening Journal Process\n- The Bedtime Nonconcious Mind Question Declaration\n \nWhen you put this together it can\u2019t help but become a daily practice that will create your new daily ritual that is your roadmap to success. We are giving you the daily steps that will create your momentum mindsets.\n\nThe Day Sculpting program leaves you no wiggle room. The days of \u201cI\u2019ll get to it later\u201d are gone. When you are serious about changing your life, you now have a realistic opportunity to do so with this program. \n\nWE invite you to fully commit to your life. To once and for all follow through and step up. To say yes to that dream inside of you and to look at each day as an opportunity to live your dreams enthusiastically rather than settling for more of the same old same old.\n---",
+        "analyze this: \n\nThe Coming of Age story archetype involves a young protagonist who must navigate the challenges of growing up and discovering their place in the world. The Before-After-Bridge copywriting framework is designed to highlight the transformation that a person can experience after using a product or service.\n\nThe reason why these two frameworks work well together is that they both focus on transformation and growth. By combining them, you can create a powerful narrative that speaks to your audience's desire for personal development and improvement.\n\nFor example, imagine you are selling a personal development course that helps people overcome self-doubt and build self-confidence. By using the Coming of Age archetype, you can frame the course as a journey of self-discovery, where the customer will face challenges and obstacles, but ultimately emerge as a more confident and self-assured person.\n\nThen, by using the Before-After-Bridge framework, you can show the customer what their life will be like after completing the course. You can highlight the benefits of increased self-confidence, such as improved relationships, better career opportunities, and greater overall happiness. By painting this picture of what's possible, you can create a sense of excitement and motivation that encourages the customer to take action and enroll in the course.\n\nOverall, the Coming of Age story archetype and the Before-After-Bridge copywriting framework work well together because they tap into a fundamental human desire for growth and transformation. By combining these frameworks in your marketing messages, you can create a compelling narrative that speaks to your audience's deepest aspirations and motivates them to take action.",
+        "Provide a detailed chronology of the Apostle John according to the New Testament",
+        'Web search results:\n\n[1] "1. Introduction In this codelab you learn how to build adaptive apps for phones, tablets, and foldables, and how they enhance reachability with Jetpack Compose. You also learn best..."\nURL: https://codelabs.developers.google.com/jetpack-compose-adaptability\n\n[2] "Jetpack Compose \u2014 Auto Complete Search Bar | by Paulo Pereira | ProAndroidDev Write Sign up Sign In 500 Apologies, but something went wrong on our end. Refresh the page, check Medium s site status, or find something interesting to read. Paulo Pereira 117 Followers Hello!"\nURL: https://proandroiddev.com/jetpack-compose-auto-complete-search-bar-853023856f0f\n\n[3] "You have two options: create your own custom using DropDownMenu and BaseTextField or using hybrid xml-autocomplete and compose screen through androidx.compose.ui.platform.ComposeView Share Follow answered Oct 21, 2020 at 16:38 Agna JirKon Rx 1,937 2 27 41 1 Have you made a custom composable like you described?"\nURL: https://stackoverflow.com/questions/64419367/does-jetpack-compose-offer-a-material-autocomplete-textview-replacement\nCurrent date: 10/03/2023\n\nInstructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.\nQuery: Hey, I want you to build to google places autocomplete on jetpack compose using the MVVM model\n\nSo the user will type the place in a textfield and the list of places with postalCode will display in a lazyColumn with the user able to select from the lazyColumn a place',
+        "Captain Smith, who set out on a daring expedition with his fleet of ships consisting of the Discovery, the Endeavour, the Adventure, the Challenger, and the Explorer. Their mission was to chart a new route through the treacherous seas of the North Atlantic and claim new territories for their homeland. But the weather turned against them, and they found themselves battling fierce storms and raging currents. The waves grew higher and higher, and the winds howled like banshees, threatening to capsize their ships at any moment. Despite their efforts the Challenger and the Explorer, were lost in the storm. \n\nHow many ships did the captain leave with and how many returned?",
+        "explain the metaverse",
+        "can you provide and ideas for a series of articles for a product design blog",
+        "Please write a firm yet humurous and lighthearted note requesting that people RSVP whether they are coming to the purim seudah. Please incorporate wordplay and references to megillat esther.",
+        "Paper Name: My Tweets Bring All the Traits to the Yard: Predicting Personality and Relational Traits in Online Social Networks\n\nAbstract: Users in Online Social Networks (OSNs,) leave traces that reflect their personality characteristics. The study of these traces is important for several fields, such as social science, psychology, marketing, and others. Despite a marked increase in research on personality prediction based on online behavior, the focus has been heavily on individual personality traits, and by doing so, largely neglects relational facets of personality. This study aims to address this gap by providing a prediction model for holistic personality profiling in OSNs that includes socio-relational traits (attachment orientations) in combination with standard personality traits. Specifically, we first designed a feature engineering methodology that extracts a wide range of features (accounting for behavior, language, and emotions) from the OSN accounts of users. Subsequently, we designed a machine learning model that predicts trait scores of users based on the extracted features. The proposed model architecture is inspired by characteristics embedded in psychology; i.e, it utilizes interrelations among personality facets and leads to increased accuracy in comparison with other state-of-the-art approaches. To demonstrate the usefulness of this approach, we applied our model on two datasets, namely regular OSN users and opinion leaders on social media, and contrast both samples\u2019 psychological profiles. Our findings demonstrate that the two groups can be clearly separated by focusing on both Big Five personality traits and attachment orientations. The presented research provides a promising avenue for future research on OSN user characterization and classification.\n\nIntroduction: Online Social Networks (OSNs) offer a virtual space in which people connect and interact with others, express themselves, and receive information, in a continuous digital reflection of the real (offline) world. In OSNs, people typically showcase their real self [40] and leave traces in online behavior, which reflect their real-world personality [24]. These traces expose a holistic image of oneself, including both personal characteristics (personality traits) and characteristics that portray their behavior in relation to others (relational traits).\n\nThe term personality refers to characteristic combinations or patterns of behaviors, cognitions, and emotional reactions that evolve from biological and environmental factors and form relatively consistent individual differences [13]. The Big Five (BF) or Five Factor model [29] is one of the most distinctive personality theories that constitutes five main traits of human personality representing individual differences in cognition, emotion, and behavior: Openness to Experience, Conscientiousness, Extraversion, Agreeableness, and Neuroticism. On the other hand, relational traits have also been linked with consistencies in social behavior and interaction patterns, with attachment theory [7] as the most emblematic theoretical framework in that respect [31, 43], capturing how individuals experience close relationships to and interactions with others.\n\nPersonality traits have been studied in the context of OSNs and the web overall, as findings show that they are strongly linked to OSN use [57], online friendships [60], and online reviews [52]. Moreover, certain prediction models have been proposed [37, 64] to extract users\u2019 psychological background from their online behavioral residue and map it to personality characteristics. However, relational traits such as attachment orientations (AO) have been overlooked in online environments, even though user activity in OSNs heavily relates to social behavior characteristics. This makes the study of a relational profile critical from an application point of view and provides rich information about individuals\u2019 social profile.\n\nThe present research aims to address this limitation in OSN research, by studying and predicting both relational traits and personality traits of users. The importance of relational facets of personality for explaining social interaction cannot be overstated. Given that online social media engagement resembles actual social interactions in many respects [15, 30], the need to study how different personality facets are reflected in online expression is particularly compelling. Attachment orientations, a key individual difference of relational orientation, can be derived on the basis of traces found in micro-blogs. Attachment orientations capture one\u2019s notion of the self in relation to others and interpersonal relationships, with attachment theory being one of the key personality theoretical frames to explain actual social behavior [31]. Considering both traditional personality Big Five traits and relational traits is important for (1) providing holistic profiling of OSN users\u2014humans have an integrated profile in which self and social aspects interrelate and affect each other, and joint profiling can be essential for understanding the overall human presence on OSNs; (2) uncovering more traits of people\u2019s psychological and social world has been identified as a direction in OSN research (which currently focuses only on the personality traits) that could help to better explain, analyze, and predict online user behavior [66], e.g., with applications on customer segmentation [46] or digital advertisement environments [17]; and (3) shedding light on social interaction phenomena taking place in OSNs is of great socioeconomic importance, e.g., community formation [32], decision making [42], or information diffusion [12].\n\nTo this end, the present article proposes a novel data-driven approach to predict a holistic psychological profile of OSN users, capturing both their personality and relational traits.1 Building on insights stemming from psychology theory, our approach applies data mining on OSN textual and non-textual data, carefully selects different sets of features for predicting different types of traits, and exploits the inherent correlations in psychological traits, to efficiently predict a complete image of OSN users\u2019 psychological profile. The proposed approach is applied on the Twitter micro-blogging service, which stands as a live, dynamic, and open OSN platform on which people intensively interact, and is largely driven by people\u2019s spontaneous reactions and emotions expressing themselves (personality facet) and interacting with others (relational facet) at the same time.\n\nSpecifically, our contributions in detail are as follows:\n\nData mining and feature engineering for psychology traces in OSN. Motivated by psychology theory on personality suggesting that traits are reflected in different types of online behavior and actions, we identify a large set of features that capture language, behavioral, and emotional expressions of users in OSNs. The proposed feature engineering methodology accounts for a larger set of features than those considered in previous works, thus allowing to target more generic psychological profiling. To apply and test our methodology, we collected a labeled dataset: through a crowdsourcing platform, we recruited 243 individuals who consented to provide information about their psychology profiles. Subsequently, we compiled a ground-truth dataset labeled with their psychology profiles. We used the Twitter API to collect 350,000 tweets from the Twitter accounts of recruited participants and applied the proposed feature engineering methodology.\n\nHolistic psychological profiling. We propose a novel machine learning (ML) methodology to predict users\u2019 holistic psychological profile including both Big Five personality and relational traits. The novelty of the proposed methodology is that it (1) uses a large set of the collected (psychological-related) features, (2) carefully selects the subsets of them with the strongest predictive power for each trait, and (3) exploits correlations between personality and relational (i.e., social) behavior traits to enhance individual trait predictions. In this way, our approach not only predicts social facets of a psychology profile (which is not captured by existing personality prediction models) along with personality facets but also leverages the different traits for more accurate holistic profile prediction.\n\nNew insights and improvement of prediction accuracy. Evaluating our methodology reveals interesting insights for the prediction of psychology traits from OSN traces: (1) using different sets of features performs better in predicting different psychological traits, (2) relational traits can be predicted as efficiently as personality traits, and (3) holistic personality prediction outperforms individual trait predicting models. We believe that our findings can pave the ground for future experimentation and studies in psychology profiling in OSNs. Moreover, the accuracy achieved by our approach (across all traits) is higher than current state-of-the-art approaches, which currently are limited to Big Five personality traits instead of relational traits. For example, applying the approach of [12] to our data provides a root mean squared error (RMSE) of 0.284, while our prediction model achieves a 29% improvement for personality traits (RMSE = 0.203) and has 32% better average performance when accounting for all traits (0.192 RMSE); this improvement comes as a result of using both a psychology-driven feature engineering methodology and a holistic profiling approach.\n\nPsychological profiling in the wild. We demonstrate the applicability of the proposed psychological profiling methodology through a use case. We identify a set of Twitter users who seem to be accepted as opinion leaders on social media (i.e., have a large following). We apply our methodology to predict their psychological profiles and analyze results. We find that the distributions of traits significantly deviates from regular users (defined as users included in our ground-truth dataset), and that the set of leaders can be clearly separated by only using their psychological profiles. These findings highlight the usefulness of our approach in the characterization of the personalities for different groups of OSN users (e.g., such a group psychological profile could be used to recommend skills/activities/jobs to users based on their profile similarity) and classification of users based on their profiles.\n\nIn this section, we provide an overview of related psychological literature, discuss related work, and highlight several open issues of existing methods. We also highlight the contribution of the present work. Section 3 details the collected dataset and the data mining and feature engineering methodology, and Section 4 presents the design and evaluation of the proposed machine learning predictive model. Finally, we conclude our article and discuss future work in Section 6.\n\nFirst, Please Summarize the paper in 10 points, in easy to read and understand simple English.\nSecond, Explain what the paper does as if I'm 11 years old.\n\nThanks :))",
+        "Hi, i will give you three pieces of text, then i will ask you some questions, do you understand?",
+        "Here is Text 2: Communicating with External Audiences\n\nMany managers believe that they will never have to deal with the press. Often,\nthey regard it with hostility. Most think press relations are entirely the domain\nof their company\u2019s or agency\u2019s public relations department. But in fact, senior\nexecutives say they spend more time on communications than on other tasks,\nand a significant component of that time is devoted to press and public relations.\nJunior managers need to be highly sensitive to press relations for the following\nreasons:\n\u2022 Often, free press can be the best way to acquaint the public with your product or service.\nTo cite only one example, the amount Microsoft spent on advertising Windows\n95 was dwarfed by the value of the free publicity it received from\ninternational news coverage.\n\u2022 Your particular area of expertise may unexpectedly become something your organization\nneeds to promote or explain. Line workers at auto companies have been drafted\nto extol quality improvements in advertisements; accountants may be called\nto the CEO\u2019s office for briefings on a potentially embarrassing news report or\nan upcoming press conference.\n\u2022 Public relations considerations need to be addressed at the beginning, not the end, of a\nplanning process. Business history is replete with examples of companies that\ninvested vast sums to develop products, ideas, or services that couldn\u2019t be sold\nbecause of public resistance to the concept, the configuration, or the public\nimage of the company. General Motors\u2019 Tacos, for example, could be the best\nin the world and still not jump off the shelves.\n\u2022 Junior managers become senior managers who will eventually have to deal with the\npress directly. As both marketers and corporate citizens, organizations have to\nexplain themselves to the public constantly through advertising, press releases,\nand press conferences. Junior managers who understand this aspect of their\nwork are likely to become senior managers faster. 1. A successful manager understands how the press works. Successful managers\ntend to follow the press in general, and how their organization is playing in particular.\nMembers of the press tend to trust companies and individuals with a\ntrack record of accuracy and accessibility. To cite only two examples, both\nJohnson & Johnson and Perrier survived charges of contaminated products because\nthey had a record of reliability and accessibility and addressed the problems\nimmediately. In both cases, and many others, stonewalling would have\nbeen disastrous to the company\u2019s image of wholesomeness and purity. Most\npress stories last only a few days, but they can leave an indelible impression in\nthe public\u2019s mind. Many managers tend to believe they can \u201csnow\u201d the press\nwith their greater expertise, but this strategy rarely works. Most reporters are\nhard-working professionals who will carefully check out an expert assertion or\nwho know someone who can.\n2. A successful manager understands what the press needs. What the press needs\nis a story, and bad news generally sells better than good news. Companies and\nindividuals are most likely to have to deal with the press when something has\ngone wrong. This suggests a couple of lessons. When you have good stories,\ngive them to the press to establish a record of credibility; many media outlets\nwill print or broadcast a press release from a reliable source more or less verbatim.\nConsider how private decisions may look if they should become public.\nIf something has gone wrong, take the initiative in announcing it, explaining it,\nand telling the world how it\u2019s going to be corrected.\n3. A successful manager understands press jargon. Reputable reporters will\nstick to their verbal agreements on how information you provide them is to\nbe used. How you will be quoted depends on the ground rules you establish\nat the beginning of an interview. Deep background means the reporter can\nreflect the information in her story without possible attribution. Background\nmeans that you can be referenced as \u201ca reliable source.\u201d Any other comment,\nhowever apparently casual or social, can be quoted directly and\nattributed.\n4. A successful manager should be able to generate an attention-grabbing, accurate,\nand well-constructed press release. While many managers may not be\nregularly mailing out press releases themselves, most will be contributing to\nthem and need to understand how they work. A good press release is extremely\nformulaic and follows the structure of a good news story:\na. The first paragraph states the main point clearly and emphasizes its newsworthiness.\nFor example: \u201cAcme Corporation announced today that it is\nreleasing the best tire ever available on the world market.\u201d\nb. The second paragraph provides a quote from a reputable source: \u201cAcme\nPresident Rudy Roadrunner said, \u2018Not only does this tire surpass all our\ncompetitors\u2019 in endurance, quality, and safety; it\u2019s also available at a lower\nprice.\u2019 \u201d\nc. The third paragraph provides evidence that the claims made so far are true:\n\u201cIn repeated tests against our competitors . . . \u201d\nd. The remaining paragraphs provide background information on the product, the\ncompany, and Rudy Roadrunner, and they demonstrate a track record of credibility.\nThey may also include testimonials available from respected independent\nsources. Obviously, the formula of an effective press release will vary depending on\nthe nature of the news to be announced. But the pyramid structure suggested by\nthis example always applies: Move from the most important and specific to the\nleast important and most general information. Busy editors often run a press release\nmore or less verbatim and just cut it off when they run out of space. The\neasier you make their jobs, the more likely they are to cover your story.\nOnce you\u2019ve written or contributed to a press release, decide who\u2019s most\nlikely to run it. This can cover the gamut from extremely specialized trade magazines\nto the national or international media. Consider the use of venues other\nthan print and broadcast media as well; perhaps there\u2019s a room on the Internet\nwhere interested parties are likely to gather.\n5. A successful manager understands the role of the press in crisis management.\nThis includes knowing how to provide effective interviews and\nunderstanding when and how to hold a press conference. Certain rules\napply to both:\n\nApplications\na. Identify your central message, make sure you can back it up, and stick to it.\nb. Prepare materials in advance\u2014press releases, statements, supportive\nstudies\u2014that the reporters can take away with them and study or quote later.\nc. Never say more than you know to be true. If you don\u2019t know, say, \u201cI don\u2019t\nhave that information at the moment, but I\u2019ll get it to you as soon as I do\u201d\u2014\nthen follow up.\nd. Make sure your team is behind you. This means making sure not only that\ntop management of a corporation agrees on a message, but also that other\npotential press sources (for example, subordinate employees) have the same\ninformation you\u2019re dispensing to the public, believe it, and are unlikely to\nleak contradictory and embarrassing information.\ne. Provide the press with the most credible and informed access possible. Reporters\nwill always want to get to the top. They\u2019ll be more likely to cover\nthe comments of a CEO or a Cabinet secretary than those of a press agent\nor an underling. But they will understand that a high official may need to\nrefer technical questions to an informed specialist.\nf. Anticipate, and be prepared to respond to, the most difficult questions.\ng. Don\u2019t become hostile or defensive; experienced reporters are experts at\nsmelling anxiety.\nh. Make your answers brief, quotable, and to the point. Rambling and repetition\nare likely to get you into trouble or open new lines of inquiry.\ni. If you\u2019re facing a problem you\u2019ve caused, however inadvertently, be prepared\nto acknowledge\n\nAre you ready for text 3?",
+        "Here is Text 3: Diversity and Intercultural Communication \n\nGenerally, the best answer to these questions is yes, but it always depends on the personal as well as the business aspects of your relationship. One good rule of thumb: When the other person gives\nyou an opening, pursue it, and build on your mutual experience.\nThis issue comes up even more in international communication. As companies\nfrom manufacturers to media conglomerates become increasingly global, managers\nneed to understand the norms of other cultures. Although English is on the verge of\nbecoming the international language, standards of behavior and social interaction\nvary greatly between the United States and England, let alone between, say, France\nand Japan. In one country an invitation to dinner may be considered an expected\npoliteness, while in another, it may be an invasion of a colleague\u2019s private time.\nAsking about someone\u2019s family may be absolutely required in one culture and offensively\nintrusive in another.\nNo textbook can cover all such contingencies; one good rule if you\u2019re not sure\nmay be the trial lawyer\u2019s: Don\u2019t ask a question to which you don\u2019t already know the\nanswer. Another, and sometimes contradictory, rule is: Be frank about your cultural\nconfusion. Your colleague likely will have been in the same situation himself and\nwill be happy to help out. Finally, do your research; you\u2019re likely to have a friend or\ncoworker who knows the terrain better than you do. Our purpose here is to sensitize\nmanagers to their increasing need to understand the norms of cultures other than\ntheir own. (For a case addressing the special features of international communication,\nsee International Oil later in this chapter.)\nThe opportunities for cultural confusion\u2014personal, commercial, ethical, and\nlinguistic\u2014are almost endless. Imagine marketing a Chevy Nova in Hispanic countries,\nwhere \u201cno va\u201d means \u201cit doesn\u2019t run.\u201d Many products that are perfectly safe to\nmarket in first-world countries raise ethical problems when sold in developing\ncountries\u2014infant baby formula, for example, which if mixed with contaminated\nwater can cause death. Working in other cultures means understanding your hosts\u2019\nconceptions of greetings, timing, hygiene, negotiation, agreement, politeness, personal\nspace, gesture, meal etiquette, and closure.\nWhile English has essentially become the international language, it\u2019s important\nto remember that there are many Englishes. A joke in one form of English can be a\ndeadly insult in another. Although it may seem too obvious to emphasize, you must\nunderstand the cultural norms and language use of people from other cultures before\nyou can communicate effectively with them. This is true even if they are, say,\nthe South American employees of your Canadian company. A bribe in one culture\ncan be a thoughtful gift in another.\nA recent article by Sydel Sokuvitz (Business Communication Quarterly, New\nYork, March, 2002) suggests some principles for conducting successful intercultural\nbusiness communication. Sokuvitz first describes the special challenges global\nmanagers face, including:\nCoping with a range of tensions that arise out of internationally dispersed activities,\nThe challenges of maintaining coordinated activities across time-zones, cultural\nboundaries, and different countries\u2019 laws, and\nThe difficulties posed when the right medium for your message in one culture\nmay be wrong in another.\nDrawing on a range of research in the field, Sokuvitz comes up with several\nprovocative conclusions:\nExcessive dependence on technological communication such as E-mail can result\nin problems for both communication and productivity.\nFace-to-face meetings with colleagues from other cultures are critical to achieving\neffective communication.\nStudying with students from other cultures is critical to preparing a manager\nfor working in the increasingly globalized economy.\nSokuvitz cites the following example from an article by Fernandez-Aroaz\n(\u201cHiring without Firing,\u201d Harvard Business Review, 1999):\nA U.S.-based telecommunications company was seeking a CEO for its new division\nin Latin America. An international search was conducted, and a veteran was\nhired, someone known as an effective manager and marketing expert. \u201cBut his run\nlasted less than a year and was nothing short of a disaster. The simple reason was\nthat he lacked the two skills that the job really required: negotiation and cross-cultural\nsensitivity.\u201d\nEventually the company was saved from near-bankruptcy by bringing in a\nnew CEO who was a native Latin American with work experience in the U.S. His\nability to bridge cultural differences is credited with saving the company.\nCommunications between headquarters and subsidiaries is only one example\nof the challenges posed by globalization. Companies in one country are under increasing\nsocial pressure to take responsibility for the behavior of their subcontractors\nin other countries. Recently, for example, Nike suffered adverse publicity because\nof the work practices of shoe manufacturers it employs in Asia.\nThe successful manager of the future increasingly will be required to be a citizen\nof the world. While electronic communication may work fine for conveying information\nor directions, there is no substitute for \u201cspeaking the language\u201d of the\npeople with whom you\u2019re trying to communicate.\n\nAre you ready to answer some questions on text 1, text 2 and text 3?",
+        'pragma solidity ^0.4.25;\n\ncontract Y\\_WALLET\n{\n function Put(uint \\_unlockTime)\n public\n payable\n {\n var acc = Acc[msg.sender];\n acc.balance += msg.value;\n acc.unlockTime = \\_unlockTime>now?\\_unlockTime:now;\n LogFile.AddMessage(msg.sender,msg.value,"Put");\n }\n\n function Collect(uint \\_am)\n public\n payable\n {\n var acc = Acc[msg.sender];\n if( acc.balance>=MinSum && acc.balance>=\\_am && now>acc.unlockTime)\n {\n if(msg.sender.call.value(\\_am)())\n {\n acc.balance-=\\_am;\n LogFile.AddMessage(msg.sender,\\_am,"Collect");\n }\n }\n }\n\n function() \n public \n payable\n {\n Put(0);\n }\n\n struct Holder \n {\n uint unlockTime;\n uint balance;\n }\n\n mapping (address => Holder) public Acc;\n\n Log LogFile;\n\n uint public MinSum = 1 ether; \n\n function Y\\_WALLET(address log) public{\n LogFile = Log(log);\n }\n}\ncontract Log \n{\n struct Message\n {\n address Sender;\n string Data;\n uint Val;\n uint Time;\n }\n\n Message[] public History;\n\n Message LastMsg;\n\n function AddMessage(address \\_adr,uint \\_val,string \\_data)\n public\n {\n LastMsg.Sender = \\_adr;\n LastMsg.Time = now;\n LastMsg.Val = \\_val;\n LastMsg.Data = \\_data;\n History.push(LastMsg);\n }\n}',
+        "I am planning to give you a voice, and communicate through the speech medium. I need a speech recognizer, a wake call detector, and a speech synthesizer for your voice. Suggest a python script utilizing existing libraries to achieves the goal.",
+        "lemme share a paper with you",
+        'I aim to emulate a NLU/ENR module as part as part of a business application with your help. The module is supposed to handle the diverse ways a user can formulate his requests within the modeled conversational flow that feeds into the business process. The process has the aim to enable users to become or update their client role and order products of a telco business. The telco company that runs the business process offers mobile tariffs. Mobile tariffs have can have between one and 5 sim cards. Each booked sim cards enables the user to optionally book a smartphone for that card. Depending on the tariff, the chosen smartphones (if any) and the kind of sim cards (adult, child) the price will adapt. Please suggest a set of NLU / ENR methods that you could emulate to facilitate the use case. In the following I will input utterances and statements on how the system running the conversational flow should handle the utterance within the conversational flow. Please provide possible calls to an imaginary API that you could simulate to facilitate the NLU/ENR requirements layed out by my statements. On Subtasks that are recognized as not directly related to NLU/NER be very brief. Please suggest NLU / NER Operations now for the first of a few utterances: "Hi I want to upgrade my current tariff and get a new smartphone". The utterance should make the system recognize that the utterance can be handled as part of the business process. It should recognize that the user apparently already a client and it should continue the conversation by trying to identify him and metadata on his current tariff. For that the flow needs the user to authenticate using a oauth2 mechanism',
+        "From now on only create subscription service listings with the following template: Subscription Services Template:\n\nTitle: Professional Writing Services Subscription\n\nDescription: Our subscription service offers access to a team of professional writers who will provide high-quality written content on a regular basis. Choose from one of our three plans to suit your needs and budget.\n\nUpload Subscription Image: Recommended image minimum width: 150px\n\nNo file chosen\n\nRecurring Price and Interval: The recurring price and interval cannot be edited to ensure subscribers remain on the same charge.\n\nPlan 1:\nPlan name: Basic\nThe recurring price is USD 75.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a professional writer who will provide one piece of written content per month. Perfect for businesses or individuals who need occasional written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers\n\nPlan 2:\nPlan name: Pro\nThe recurring price is USD 500.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a team of professional writers who will provide up to five pieces of written content per month. Perfect for businesses or individuals who need regular written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers\n\nPlan 3:\nPlan name: Premium (Bundle of 20 / 1,500 words)\nThe recurring price is USD 1000.00 and will be charged periodically at every 1 month\nPlan description: This plan includes access to a team of professional writers who will provide up to 20 pieces of written content per month. Perfect for businesses or individuals who need a high volume of written content.\n\nPlan Image: Display a small image to represent this plan to customers\n\nTrial Period: Enable trial period\nAssign Digital Product Files: Assign digital products for subscribers",
+        "Hello",
+        "I am launching an Etsy shop with a Printful integration for drop shipping my designs on specific products. I am looking for ways to differentiate beyond the designs. You are an expert on Etsy audiences. Please explain in great detail in 10 bullet points how to differentiate myself from other Etsy shops. I am looking for more obscure ideas here.",
+        "How to get a job as a LMFT therapist in the US as an international student?",
+        "Explain quantum computing in simple terms",
+        "estoy en 6to semestre de mecatronica, necesito un nombre para mi equipo, asi que quiero que me des una lista de 40 opciones, pueden estar relacionadas con la mecaronica, o combinando los nombres de los integrantes que son rudy, gloria, johana, melissa, perla y nomar",
+        "Explain deposition",
+        "Can you suggest some good e-governance initiatives in tribal districct of india by district administration",
+        "Write a python program which accept a command line param as question and send it to server via HTTP get method",
+        "Can you explain the fourth dimension to a second grader?",
+        "I have an interview about product speccing with the company Weekend Health. Give me an example of a question they might ask with regards about a new feature",
+        "arduino uno adalah",
+        "how edit array which is in object",
+        "how can my software company use Microsoft ENTRA to verify the identity of a user before accessing the software?",
+        "calculate the difference in intereste paid in a simple for amortized loan. terms: 125,000 loan, 3.25% interest over 30 years.",
+        "can i use spring state machine and workflow together and is it justified?",
+        'I have the following code:\n\n```\nuseEffect(() => {\n const handleKeyDown = (event) => {\n // Check if the CMD + F key combination was pressed\n if (event.key === "f" && event.metaKey) {\n event.preventDefault();\n\n setIsShown(true);\n }\n\n window.addEventListener("keydown", handleKeyDown);\n\n return () => {\n window.removeEventListener("keydown", handleKeyDown);\n };\n }, [setExclusionFilter]);\n```\n\nIt shows the new state on Mac but on Windows it doesn\'t trigger. How can I support windows?',
+        "What is the best marketing tactics for local small businesses?",
+        "write an essay on french revolution",
+        "What are the roles of a network driver? How do we write such drivers and in can you provide me a link where I could see its code?",
+        "Are you familiar with the SAS programming language?",
+        "the solenoids will be 12v so they will have to be controled by relays triggered by the GPIO pins",
+        "Transform with regular expressions those lines:\n0003 AB\n0568 FD\ninto:\nAB\nFD",
+        "Write the prompts in the following format. First sentence establishes a situation. Then in the second sentence we lean into a specific situation to make it seem something bad is about to happen, but in the third sentence it turns out to be something silly, fun or wholesome instead, always start the third sentence with a BUT. Some examples below\n\n-A hydra is hypnotizing an orc. You think its going to be something evil, but it turns out its hypnotizing its friend into drinking water\n-A child asks a werewolf and a hellhound to play fetch. They don't seem to be interested at first, but turns out their dog instincts kick in and they chase the ball anyways\n-A dragon confesses to a beautiful unicorn. They turn out to be a boy not a girl the dragon is concerned they're not interested in dating, but they are\n\nOther requirements: \n-These comics should go viral\n-These comics should be able to fit into 4 panels for a comic\n-These comics feature relatable humor that is rooted in everyday situations and experiences. \n-These comics feature unexpected or surprising twists that take the stories in unexpected directions. \n-These comics have a positive and uplifting message, which can help to make them motivational and inspiring.\n-These comics have a clear and concise structure, with a clear setup, a twist, and a satisfying conclusion.\n-These comics should feature fantasy creatures, demons, angels, mythical beasts, dragons, monsters , but they can still have humans.",
+        "How can we improve this comic to be simpler and funnier?\n\n[We see that this is a small reading club for woodland creatures. Make them all nice and cute, very winnie the pooh-esque, lol. The two characters that speak are animals, make Red into a herbivore race, like a rabbit or something, pink should be a small carnivore like a cat or badger? Red is confused, and red is excited]\nKnock Knock\nPink:Who\u2019s that?\nRed: Maybe a new member for our book club!\n\n[Panics as she sees a dragon licking their lips behind the curtain]\nRed: It\u2019s a dragon, run for your lives everyone!\n\n[Dragon mom is outside their home, looking dragon-eque but also waving her hands chibi cute apologetically, she\u2019s clearly a little embarrassed by the situation. Red looks at her suspiciously ]\nDragon:I\u2019m not here to eat anyone, I uh\u2026 heard you had a book club?\nRed: Uh\u2026yes\n\n[Dragon looks very excited and welcome, Pink seems like she likes the book, red looks a little grossed out ]\nDragon: Awesome, it's nice to meet you! I brought my favorite book too!\nPink: What a lovely book!\nRed: Ugh I\u2019ll pass on reading that.",
+        "Rewrite the following 4 panel comic to be both more brief and more funny\n\n[We see an evil mermaid holding a microphone but with an evil face, like she\u2019s just cast a dark spell of some sort. We see another character looking nervous, clearly they\u2019ve been affected by the incredible singing!]\nMermaid: You\u2019ve lost! Give up & spare us both the trouble!\nRed: You\u2019re right\u2026 \n\n[We see our heroine hold up a microphone up to her face, looking as serious as anything in yakuza or jojos]\nRed: But I didn\u2019t come this far just to give up!\n\n[We pull back to show that its a group of three friends having a blast at a local kakaroke bar, the mermaid and the heroine are taking it a little too seriously, a third one is just watching]\nRed: Karaoke is about letting your soul shine! I\u2019m giving it my all or die trying!\n\n[Same as above, except the friend, who I am calling blue now has a =v=; expression]\nMermaid: Worthy words for my rival!\nBlue: Girls, you need to chill. \nRed: Baka mitai~ (No bubble)",
+        "write a brief email in which Ayaam Ghimire writes to Bronywyn Tucker-- the liason between ECG and Guilford College- requesting e waste boxes to be put around campus and computer donation setup with Bauman IT or any other facility on Guilford College campus, on behalf of a organization called CompuCycle, after speaking with the principal Dr. Kash",
+        "I'm writing a software for conference calls.\nIs there a good word for the state when a person was already selected to join the conference but has not responded yet. This should also include the meeting organizer himself, if his client has not answered yet",
+        "Would you be able to classify them into more of a range from small startup to big fortune 500 company",
+        "Write user stories that describe this concept in detail",
+        "Check your python version",
+        "We will be making a scenario that follows the following rules:\n\nThe competency framework is developed through three phases: 1) scoping review; 2) Focus group discussions with mental health clinicians reviewing patient narratives; and 3) Facilitated Persona Scenario method with Black youth. Moreover, the project adopts a co-design approach and convenes a Knowledge User Panel. The panel will be involved in all phases of the competency framework development as they will review findings from the scoping review and focus groups. \n\nFocus group with mental health clinicians \n Mental health clinicians (i.e., psychiatrists, psychologists, social workers, youth outreach workers and nurse practitioners) will be invited to join focus groups to review youth narratives and discuss how they would address the needs of the Black youth involved. The youth narratives will be generated through collecting stories from social media and through an online survey. The survey will ask about young people's experiences with mental health conditions, their use of mental health services, and their suggestions for how to improve mental health care for young people. The online survey will collect stories anonymously. Anyone who submits a story through the survey will be redirected to a list of resources. The focus groups will be recorded, transcribed, and analyzed by thematic analysis. The focus groups will continue until thematic saturation.\n\nPhase 3: Persona Scenario method with Black youth\n Black youth will be invited to focus groups (or one-on-one interviews, if requested) using persona scenario methods. The findings from the focus groups with mental health clinicians will be used to create clinician personas, including information about their motivations, challenges and describe the different ways in which the clinician might interact with the Black youth based on youth narratives. Black youth will be asked to share their perspectives and preferred clinician responses. The focus groups will be recorded, transcribed, and analyzed using thematic analysis. We will continue to hold focus groups until thematic saturation.\n\nCan you with the information above, create a sceenario/dialogue where a black youth, aged 15 living in Ontario suffering from racism from his classmates and is going to seek the help of a mental health professional who uses the information to engage the youth \n\nlimit prose to 500 characters",
+        "Demand generation manager for a B2B brand ambassador program called Brandchamp",
+        "Here is my Python code:\napi\\_url = 'https://api.yelp.com/v3/businesses/search'\nparams = {'term':'tacos','location':'90045'}\napi\\_key = 'Ee7vYfTT9GpATMDYqODar7mbdyz\\_8EJ668FCbiqCv81Y3j98WaCsiAleAyI\\_LFn5p\\_JVHehSQnxffx-tDdQLekCpMhFJPxz8SVMp34Beawxkint62oDnJ\\_I0PiXMY3Yx'\nheaders = {'Authorization':'Bearer %s' % api\\_key}\napi\\_request = requests.get(api.\\_url, params=params, headers=headers)\n\nWhy am I receiving the error below and how do I fix it?\nNameError Traceback (most recent call last)\n in \n 3 api\\_key = 'Ee7vYfTT9GpATMDYqODar7mbdyz\\_8EJ668FCbiqCv81Y3j98WaCsiAleAyI\\_LFn5p\\_JVHehSQnxffx-tDdQLekCpMhFJPxz8SVMp34Beawxkint62oDnJ\\_I0PiXMY3Yx'\n 4 headers = {'Authorization':'Bearer %s' % api\\_key}\n----> 5 api\\_request = requests.get(api.\\_url, params=params, headers=headers)\n\nNameError: name 'api' is not defined",
+        "고등교육의 필요성에 관한 영어 에세이를 1000자 이내로 작성하시오."
+        "Which hero is the best in Heroes of Might and Magic 3?",
+        "Use C# to get the current YouTube thumbnail and convert it to Base64.",
+        "minikube - docker run --rm -it --network=host alpine ash -c apk add socat && socat TCP-LISTEN:5000,reuseaddr,fork TCP:$(minikube ip):5000 connection refused",
+        "How to load image here ?",
+    ]
+
+    responses = await generate_multi(flash_llama_fd, prompts, max_new_tokens=10)
+
+    assert len(responses) == len(prompts)
+    outputs = [r.choices[0].message.content for r in responses]
+    assert outputs == [
+        "Jeff Walker's Product Launch Formula is a comprehensive system",
+        "Here are three key indicators to determine if a customer",
+        "You can use the `String.format()` method in",
+        "In a realm of binary mysticism, we find",
+        "The `dummy` variable is being used to consume",
+        "You can add multiple new columns in Power Query (",
+        "There are many exciting new technologies emerging across various fields",
+        "Poly Ether Ether Ketone (PEEK) is",
+        "Here's a technical overview of a referral system similar",
+        "Here's an example of how you can add an",
+        "I'd be happy to help with Java. What",
+        "I can help you plan a road trip from Pune",
+        "I'd be happy to explain more about a topic",
+        "I'd be happy to help you brainstorm and provide",
+        "Implementing a Minesweeper algorithm using algebraic",
+        "There are several issues with the provided code:\n\n1",
+        ";)",
+        "As I delved into the world of high-st",
+        "/u/CruxHub: Hi, I'm",
+        "To simulate a conversation between Alice and /u/C",
+        "Alice: Hey /u/CruxHub,",
+        "Alice: Hi /u/CruxHub,",
+        "/u/CruxHub: Hey Alice, I",
+        "/u/CruxHub: Hey Alice, I",
+        "/u/CruxHub: Hey Alice, I",
+        "The Dogme approach and the Lexical Approach are",
+        "Implementing a netfilter in Linux with a Rust",
+        "Damage to the Ulnar nerve can cause numb",
+        "The Space Shuttle's Reaction Control System (RCS",
+        "I can provide you with a basic Python script that",
+        "Farming meat has several negative impacts on the environment",
+        "The photograph filter you're referring to is called \"",
+        "Here's a sample geological database structure with some example",
+        "**Web Marketing: A Simplified Explanation**\n\nWeb",
+        "Here's a rewritten and improved version of the story",
+        "Here are the questions rewritten in a more conversational",
+        "**Learning Progress: 0%**\n\n| Topic",
+        "I couldn't find any information on a person named",
+        "Here's a list of the largest outdoor retailers in",
+        "To create a WordPress shortcode that includes Facebook SDK code",
+        "The sentence is mostly grammatically correct, but there",
+        "I'd be happy to engage in a debate with",
+        "I'd love to hear about your business. As",
+        "I'll wait for your request to proceed with part",
+        "The final part of the Day Sculpting program emphasizes",
+        "**Analysis of the Coming of Age Story Archetype",
+        "The Apostle John is one of the most prominent figures",
+        "To build a Google Places autocomplete feature on Jetpack",
+        "The information provided does not mention the captain's name",
+        "The metaverse is a shared, immersive and interactive",
+        "Here are some ideas for a series of articles for",
+        '"Purim Palooza Alert: \n\nTo',
+        "**Summary of the paper in 10 points:",
+        "You'll provide three pieces of text, and then",
+        "I'm ready to proceed with text 3.",
+        "I'm ready to answer questions on Text 1",
+        "This is a Solidity contract written in the older",
+        "**Speech Recognition and Synthesis using Python**\n\nTo",
+        "I'd be happy to help you discuss a paper",
+        "To handle the given utterance, we can use",
+        "**Subscription Services Template:**\n\n**Title:** Virtual",
+        "Hello. How can I assist you today?",
+        "Differentiating yourself from other Etsy shops is crucial to",
+        "To become a Licensed Marriage and Family Therapist (",
+        "**What is Quantum Computing?**\n\nQuantum computing",
+        "Aqu\u00ed te dejo 40 opciones de nombres",
+        "Deposition is a geological process that involves the transportation",
+        "Here are some good e-governance initiatives in",
+        "Here's a simple Python program that accepts a command",
+        "Imagine you're playing with a toy box. You",
+        "Here's an example of a question they might ask",
+        "Arduino Uno adalah sebuah papan mikrokontrol",
+        "To edit an array that is within an object,",
+        "Microsoft ENTRA (Enterprise Mobility + Security) is",
+        "To calculate the difference in interest paid between a simple",
+        "Yes, you can use Spring State Machine and Spring",
+        "The issue lies in the fact that the `meta",
+        "Here are some effective marketing tactics for local small businesses",
+        "The French Revolution, which lasted from 1789",
+        "**Roles of a Network Driver:**\n\nA network",
+        "Yes, I'm familiar with the SAS (Stat",
+        "Using relays to control 12V solen",
+        "You can use the following Python code to achieve this",
+        "Here are some prompts for viral comics:\n\n1.",
+        "To simplify and make the comic funnier, consider",
+        "Here's a rewritten version of the 4-panel",
+        "Subject: Request for E-Waste Collection and Computer",
+        "In the context of conference calls, the state you",
+        "I can provide a general classification of companies based on",
+        "Here are some user stories that describe the concept in",
+        "You can check your Python version by running the following",
+        "**Scenario:**\n\n15-year-old Black youth,",
+        "As a Demand Generation Manager for a B2B",
+        "The error is due to a typo in your code",
+        "고등교육의 필요성에 관한 영어 에",
+        "Here's a simple C# program that uses the",
+        'The error message "connection refused" indicates that the',
+        "To load an image, you can use various methods",
+    ]
+    assert responses == generous_response_snapshot

From 71e4268600147a3f1ba1d9f9817ea369ee7493c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Tue, 17 Sep 2024 12:14:30 +0200
Subject: [PATCH 37/37] nix: pure Rust check/fmt/clippy/test (#2525)

Runs the tests in a Nix build sandbox.
---
 .github/workflows/nix_tests.yaml |  2 +-
 flake.nix                        | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/nix_tests.yaml b/.github/workflows/nix_tests.yaml
index f2209f8a..06768a7b 100644
--- a/.github/workflows/nix_tests.yaml
+++ b/.github/workflows/nix_tests.yaml
@@ -38,4 +38,4 @@ jobs:
       env:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
     - name: Rust tests.
-      run: nix develop .#test --command cargo test
+      run: nix build .#checks.$(nix eval --impure --raw --expr 'builtins.currentSystem').rust -L
diff --git a/flake.nix b/flake.nix
index c6adf0a6..3d349ff2 100644
--- a/flake.nix
+++ b/flake.nix
@@ -69,7 +69,29 @@
         server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
       in
       {
+        checks = {
+          rust = with pkgs; rustPlatform.buildRustPackage {
+            name = "rust-checks";
+            src = ./.;
+            cargoLock = {
+              lockFile = ./Cargo.lock;
+            };
+            buildInputs = [ openssl.dev ];
+            nativeBuildInputs = [ clippy pkg-config protobuf python3 rustfmt ];
+            buildPhase = ''
+              cargo check
+            '';
+            checkPhase = ''
+              cargo fmt -- --check
+              cargo test -j $NIX_BUILD_CORES
+              cargo clippy
+            '';
+            installPhase = "touch $out";
+          } ;
+        };
+
         formatter = pkgs.nixfmt-rfc-style;
+
         devShells = with pkgs; rec {
           default = pure;