From 82f87ada6f08114ae198abb0829d087f311cf5bc Mon Sep 17 00:00:00 2001
From: Jacob Keisling <jacob@keisling.me>
Date: Tue, 23 Jan 2024 08:55:05 -0600
Subject: [PATCH 01/31] Disable `decoder_input_details` on OpenAI-compatible
 chat streaming, pass temp and top-k from API (#1470)

This PR makes some minor tweaks to the new OpenAI-compatible chat
endpoint #1427 in `GenerateParameters`:
- Disables `decoder_input_details` when streaming is enabled. This was
causing all streaming chat requests to fail before, since
[`decoder_input_details`==true is not enabled when streaming
tokens](https://github.com/huggingface/text-generation-inference/blob/98e5faff9daec6170cc2b0f963f2d73cf846b341/router/src/validation.rs#L406).
- Passes through `temperature` and `top_p` hyperparameters from the API
request to `GenerateParameters`

## Testing

```bash
curl localhost:8080/v1/chat/completions \
    -X POST \
    -d '{
  "model": "",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": "What is deep learning?"
    }
  ],
  "stream": true,
  "max_tokens": 20
}' \
    -H 'Content-Type: application/json'
```

Should work correctly. Currently, most recent release from `main`
returns error:
```
data:{"error":"Input validation error: `decoder_input_details` == true is not supported when streaming tokens","error_type":"validation"}
```

It's my first time contributing to this project, so I could be missing
something. Would especially appreciate @drbh's eyes on this one
---
 router/src/lib.rs    | 12 ++++++++++++
 router/src/server.rs |  6 +++---
 2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 983079d6..894ab466 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -365,6 +365,18 @@ pub(crate) struct ChatRequest {
 
     #[schema(nullable = true, example = 42)]
     pub seed: Option<u64>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
+    /// lower values like 0.2 will make it more focused and deterministic.
+    ///
+    /// We generally recommend altering this or `top_p` but not both.
+    #[serde(default)]
+    pub temperature: Option<f32>,
+
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the
+    /// tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    #[serde(default)]
+    pub top_p: Option<f32>,
 }
 
 #[derive(Clone, Serialize, Deserialize)]
diff --git a/router/src/server.rs b/router/src/server.rs
index cf1d94a6..aa1ad202 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -592,10 +592,10 @@ async fn chat_completions(
         inputs: inputs.to_string(),
         parameters: GenerateParameters {
             best_of: None,
-            temperature: None,
+            temperature: req.temperature,
             repetition_penalty,
             top_k: None,
-            top_p: None,
+            top_p: req.top_p,
             typical_p: None,
             do_sample: true,
             max_new_tokens,
@@ -604,7 +604,7 @@ async fn chat_completions(
             truncate: None,
             watermark: false,
             details: true,
-            decoder_input_details: true,
+            decoder_input_details: !stream,
             seed,
             top_n_tokens: None,
         },

From 7e542d4d05513575f9eb950f961c5b4c574c9c29 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 24 Jan 2024 13:08:41 +0100
Subject: [PATCH 02/31] Fixing non divisible embeddings. (#1476)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 .../test_idefics/test_idefics.json            |  55 +++--
 .../test_idefics/test_idefics_load.json       | 214 +++++++++---------
 server/tests/utils/test_layers.py             |  64 ++++++
 server/text_generation_server/utils/layers.py |   4 +-
 .../text_generation_server/utils/weights.py   |   2 +-
 5 files changed, 199 insertions(+), 140 deletions(-)
 create mode 100644 server/tests/utils/test_layers.py

diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
index 2c5d05f6..90fb6dcc 100644
--- a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
+++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
@@ -11,92 +11,92 @@
       },
       {
         "id": 4911,
-        "logprob": -5.7851562,
+        "logprob": -6.9765625,
         "text": "User"
       },
       {
         "id": 29901,
-        "logprob": -0.006996155,
+        "logprob": -0.0059432983,
         "text": ":"
       },
       {
         "id": 32000,
-        "logprob": -0.81347656,
+        "logprob": -0.8408203,
         "text": "<fake_token_around_image>"
       },
       {
         "id": 32001,
-        "logprob": -6.687641e-05,
+        "logprob": -9.906292e-05,
         "text": "<image>"
       },
       {
         "id": 32000,
-        "logprob": -3.5762787e-07,
+        "logprob": -2.3841858e-07,
         "text": "<fake_token_around_image>"
       },
       {
         "id": 1815,
-        "logprob": -4.2148438,
+        "logprob": -4.1679688,
         "text": "Can"
       },
       {
         "id": 366,
-        "logprob": -0.014137268,
+        "logprob": -0.014099121,
         "text": "you"
       },
       {
         "id": 2649,
-        "logprob": -4.4335938,
+        "logprob": -4.4609375,
         "text": "tell"
       },
       {
         "id": 592,
-        "logprob": -0.2919922,
+        "logprob": -0.29882812,
         "text": "me"
       },
       {
         "id": 263,
-        "logprob": -4.2070312,
+        "logprob": -4.1445312,
         "text": "a"
       },
       {
         "id": 1407,
-        "logprob": -9.421875,
+        "logprob": -9.3828125,
         "text": "very"
       },
       {
         "id": 3273,
-        "logprob": -1.8720703,
+        "logprob": -1.9736328,
         "text": "short"
       },
       {
         "id": 5828,
-        "logprob": -0.26489258,
+        "logprob": -0.2800293,
         "text": "story"
       },
       {
         "id": 2729,
-        "logprob": -3.7441406,
+        "logprob": -3.5625,
         "text": "based"
       },
       {
         "id": 373,
-        "logprob": -0.0005393028,
+        "logprob": -0.0006427765,
         "text": "on"
       },
       {
         "id": 278,
-        "logprob": -0.140625,
+        "logprob": -0.13952637,
         "text": "the"
       },
       {
         "id": 1967,
-        "logprob": -0.06756592,
+        "logprob": -0.068115234,
         "text": "image"
       },
       {
         "id": 29973,
-        "logprob": -0.15454102,
+        "logprob": -0.16357422,
         "text": "?"
       }
     ],
@@ -104,25 +104,25 @@
     "tokens": [
       {
         "id": 32002,
-        "logprob": -0.0019140244,
+        "logprob": -0.0026474,
         "special": true,
         "text": "<end_of_utterance>"
       },
       {
         "id": 29871,
-        "logprob": -8.404255e-05,
+        "logprob": -8.547306e-05,
         "special": false,
         "text": " "
       },
       {
         "id": 13,
-        "logprob": -1.7642975e-05,
+        "logprob": -1.7881393e-05,
         "special": false,
         "text": "\n"
       },
       {
         "id": 7900,
-        "logprob": -2.9802322e-06,
+        "logprob": -3.0994415e-06,
         "special": false,
         "text": "Ass"
       },
@@ -140,30 +140,29 @@
       },
       {
         "id": 319,
-        "logprob": -0.91064453,
+        "logprob": -0.92529297,
         "special": false,
         "text": " A"
       },
       {
         "id": 696,
-        "logprob": -1.2412109,
+        "logprob": -1.1269531,
         "special": false,
         "text": " ro"
       },
       {
         "id": 15664,
-        "logprob": -0.0002439022,
+        "logprob": -0.00029492378,
         "special": false,
         "text": "oster"
       },
       {
         "id": 15028,
-        "logprob": -1.1630859,
+        "logprob": -1.1855469,
         "special": false,
         "text": " stands"
       }
-    ],
-    "top_tokens": null
+    ]
   },
   "generated_text": " \nAssistant: A rooster stands"
 }
diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
index f258e38d..21d6161b 100644
--- a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
+++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
@@ -12,92 +12,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7851562,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.006996155,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.81347656,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.687641e-05,
+          "logprob": -9.918213e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2148438,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014137268,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4335938,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.2919922,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2070312,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.421875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26489258,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7441406,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005393028,
+          "logprob": -0.00064468384,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.140625,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06756592,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15454102,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -105,19 +105,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.0019140244,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.583069e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7881393e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -135,36 +135,35 @@
         },
         {
           "id": 29901,
-          "logprob": -3.0994415e-06,
+          "logprob": -3.2186508e-06,
           "special": false,
           "text": ":"
         },
         {
           "id": 319,
-          "logprob": -0.9057617,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2294922,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.00024533272,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1640625,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -181,92 +180,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.942055e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.0006451607,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -274,19 +273,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.0019130707,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.571148e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7881393e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -310,30 +309,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -350,92 +348,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.918213e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.00064468384,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -443,19 +441,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.001912117,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.59499e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7762184e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -479,30 +477,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -519,92 +516,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.942055e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.0006451607,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -612,19 +609,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.001912117,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.571148e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7762184e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -648,30 +645,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   }
diff --git a/server/tests/utils/test_layers.py b/server/tests/utils/test_layers.py
new file mode 100644
index 00000000..0a9fecd1
--- /dev/null
+++ b/server/tests/utils/test_layers.py
@@ -0,0 +1,64 @@
+import torch
+from text_generation_server.utils.layers import (
+    TensorParallelEmbedding,
+)
+
+class ProcessGroup:
+    def __init__(self, rank: int, world_size: int):
+        self._rank = rank
+        self.world_size = world_size
+
+    def size(self)->int:
+        return self.world_size
+
+    def rank(self)->int:
+        return self._rank
+
+class Weights:
+    def __init__(self, rank: int, world_size: int, vocab_size: int, hidden_dim: int):
+        self.weight = torch.arange(vocab_size*hidden_dim).float().view(vocab_size, hidden_dim)
+        self.process_group = ProcessGroup(rank, world_size)
+
+
+    def get_partial_sharded(self, name:str, dim: int):
+        assert dim == 0
+
+        rank = self.process_group.rank()
+        world_size = self.process_group.size()
+        size = self.weight.shape[dim]
+
+        block_size = (size + world_size - 1) // world_size
+        start = rank * block_size
+        stop = (rank + 1) * block_size
+        return self.weight[start:stop]
+
+    def get_shape(self, name: str):
+        return self.weight.shape
+
+def test_weight_hub_files_offline_error():
+
+    vocab_size= 17
+    weights = Weights(rank=0, world_size=1, vocab_size = vocab_size,hidden_dim = 256)
+    embeddings = TensorParallelEmbedding("", weights)
+
+    input_ids = torch.arange(vocab_size)
+    output = embeddings.forward(input_ids)
+    assert embeddings.min_id == 0
+    assert embeddings.max_id == 17
+    torch.testing.assert_close(output, torch.arange(256 * 17).float().view(17, 256))
+
+    weights_0_2 = Weights(rank=0, world_size=2, vocab_size = vocab_size,hidden_dim = 256)
+    weights_1_2 = Weights(rank=1, world_size=2, vocab_size = vocab_size,hidden_dim = 256)
+    embeddings_0_2 = TensorParallelEmbedding("", weights_0_2, reduce=False)
+    assert embeddings_0_2.min_id == 0
+    assert embeddings_0_2.max_id == 9
+    torch.testing.assert_close(embeddings_0_2.weight , torch.cat([torch.arange(9 * 256), torch.zeros(256)], dim=0).view(10, 256).float())
+    embeddings_1_2 = TensorParallelEmbedding("", weights_1_2, reduce=False)
+    assert embeddings_1_2.min_id == 9
+    assert embeddings_1_2.max_id == 17
+    torch.testing.assert_close(embeddings_1_2.weight , torch.cat([torch.arange(8 * 256) + 9 * 256, torch.zeros(256)], dim=0).view(9, 256).float())
+    output_tp_0 = embeddings_0_2.forward(input_ids)
+    output_tp_1 = embeddings_1_2.forward(input_ids)
+
+    torch.testing.assert_close(output, output_tp_0 + output_tp_1)
+
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index d4fa2559..5a0de0d7 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -507,10 +507,10 @@ class TensorParallelEmbedding(nn.Module):
         world_size = process_group.size()
         rank = process_group.rank()
 
-        block_size = num_embeddings // world_size
+        block_size = (num_embeddings + world_size - 1) // world_size
         self.min_id = rank * block_size
         self.max_id = min(num_embeddings, (rank + 1) * block_size)
-        self.null_idx = block_size
+        self.null_idx = weight.shape[0]  # Usually block_size, might be less in non even vocab_size.
         self.process_group = weights.process_group
         self.reduce = reduce
 
diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py
index c4e82a6d..186733f3 100644
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@@ -92,7 +92,7 @@ class Weights:
         rank = self.process_group.rank()
 
         size = slice_.get_shape()[dim]
-        block_size = size // world_size
+        block_size = (size + world_size - 1) // world_size
         start = rank * block_size
         stop = (rank + 1) * block_size
 

From 7872b8c55b6cdbf97e30ba6e4cd700f2de7e9bc4 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Wed, 24 Jan 2024 11:41:28 -0500
Subject: [PATCH 03/31] Add messages api compatibility docs (#1478)

This PR adds a new page to the docs that describes the Messages API and
how to use it.

Additionally this page will contain cloud provider specific information
for enabling and using this feature. This PR includes a SageMaker
example/information.
---
 docs/source/_toctree.yml    |   2 +
 docs/source/messages_api.md | 134 ++++++++++++++++++++++++++++++++++++
 router/src/main.rs          |   6 +-
 router/src/server.rs        |   4 +-
 4 files changed, 141 insertions(+), 5 deletions(-)
 create mode 100644 docs/source/messages_api.md

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 6fa50a6a..d57a594d 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -7,6 +7,8 @@
     title: Installation
   - local: supported_models
     title: Supported Models and Hardware
+  - local: messages_api
+    title: Messages API
   title: Getting started
 - sections:
   - local: basic_tutorials/consuming_tgi
diff --git a/docs/source/messages_api.md b/docs/source/messages_api.md
new file mode 100644
index 00000000..899de865
--- /dev/null
+++ b/docs/source/messages_api.md
@@ -0,0 +1,134 @@
+# Messages API
+
+_Messages API is compatible to OpenAI Chat Completion API_
+
+Text Generation Inference (TGI) now supports the Message API which is fully compatible with the OpenAI Chat Completion API. This means you can use OpenAI's client libraries to interact with TGI's Messages API. Below are some examples of how to utilize this compatibility.
+
+## Making a Request
+
+You can make a request to TGI's Messages API using `curl`. Here's an example:
+
+```bash
+curl localhost:3000/v1/chat/completions \
+    -X POST \
+    -d '{
+  "model": "tgi",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant."
+    },
+    {
+      "role": "user",
+      "content": "What is deep learning?"
+    }
+  ],
+  "stream": true,
+  "max_tokens": 20
+}' \
+    -H 'Content-Type: application/json'
+```
+
+## Streaming
+
+You can also use OpenAI's Python client library to make a streaming request. Here's how:
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="-"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=True
+)
+
+# iterate and print stream
+for message in chat_completion:
+    print(message)
+```
+
+## Synchronous
+
+If you prefer to make a synchronous request, you can do so like this:
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="-"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=False
+)
+
+print(chat_completion)
+```
+
+## Cloud Providers
+
+TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
+
+## Amazon SageMaker
+
+To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`. 
+
+This will modify the `/invocations` route to accept Messages dictonaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
+
+```python
+import json
+import sagemaker
+import boto3
+from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
+
+try:
+	role = sagemaker.get_execution_role()
+except ValueError:
+	iam = boto3.client('iam')
+	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
+
+# Hub Model configuration. https://huggingface.co/models
+hub = {
+	'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
+	'SM_NUM_GPUS': json.dumps(1),
+    'MESSAGES_API_ENABLED': True
+}
+
+# create Hugging Face Model Class
+huggingface_model = HuggingFaceModel(
+	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
+	env=hub,
+	role=role, 
+)
+
+# deploy model to SageMaker Inference
+predictor = huggingface_model.deploy(
+	initial_instance_count=1,
+	instance_type="ml.g5.2xlarge",
+	container_startup_health_check_timeout=300,
+  )
+  
+# send request
+predictor.predict({
+"messages": [
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ]
+})
+```
\ No newline at end of file
diff --git a/router/src/main.rs b/router/src/main.rs
index bf987eb6..b6190908 100644
--- a/router/src/main.rs
+++ b/router/src/main.rs
@@ -72,7 +72,7 @@ struct Args {
     #[clap(long, env)]
     ngrok_edge: Option<String>,
     #[clap(long, env, default_value_t = false)]
-    chat_enabled_api: bool,
+    messages_api_enabled: bool,
 }
 
 #[tokio::main]
@@ -104,7 +104,7 @@ async fn main() -> Result<(), RouterError> {
         ngrok,
         ngrok_authtoken,
         ngrok_edge,
-        chat_enabled_api,
+        messages_api_enabled,
     } = args;
 
     // Launch Tokio runtime
@@ -348,7 +348,7 @@ async fn main() -> Result<(), RouterError> {
         ngrok_authtoken,
         ngrok_edge,
         tokenizer_config,
-        chat_enabled_api,
+        messages_api_enabled,
     )
     .await?;
     Ok(())
diff --git a/router/src/server.rs b/router/src/server.rs
index aa1ad202..ff48b4f0 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -708,7 +708,7 @@ pub async fn run(
     ngrok_authtoken: Option<String>,
     ngrok_edge: Option<String>,
     tokenizer_config: HubTokenizerConfig,
-    chat_enabled_api: bool,
+    messages_api_enabled: bool,
 ) -> Result<(), axum::BoxError> {
     // OpenAPI documentation
     #[derive(OpenApi)]
@@ -872,7 +872,7 @@ pub async fn run(
         .route("/metrics", get(metrics));
 
     // Conditional AWS Sagemaker route
-    let aws_sagemaker_route = if chat_enabled_api {
+    let aws_sagemaker_route = if messages_api_enabled {
         Router::new().route("/invocations", post(chat_completions)) // Use 'chat_completions' for OAI_ENABLED
     } else {
         Router::new().route("/invocations", post(compat_generate)) // Use 'compat_generate' otherwise

From 86c8335f1b280f6514ae0a6d22754e503cf3d414 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 25 Jan 2024 14:19:03 +0100
Subject: [PATCH 04/31] Add a new `/tokenize` route to get the tokenized input
 (#1471)

# What does this PR do?


Ideally this is done client side, but this is a recurring request,
therefore we implemented it.

- Runs only if rust tokenizer is present (not encumbering the main
inference pipeline is important).
- Returns simple results, ID, text (gotten with offsets from the
original string) and offsets (so users can do things like highlighting
text).

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 docs/openapi.json        | 884 +--------------------------------------
 router/src/infer.rs      |  22 +
 router/src/lib.rs        |  12 +
 router/src/server.rs     |  55 ++-
 router/src/validation.rs |  45 +-
 5 files changed, 115 insertions(+), 903 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index df2d427f..4454259b 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1,883 +1 @@
-{
-  "openapi": "3.0.3",
-  "info": {
-    "title": "Text Generation Inference",
-    "description": "Text Generation Webserver",
-    "contact": {
-      "name": "Olivier Dehaene"
-    },
-    "license": {
-      "name": "Apache 2.0",
-      "url": "https://www.apache.org/licenses/LICENSE-2.0"
-    },
-    "version": "1.3.4"
-  },
-  "paths": {
-    "/": {
-      "post": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
-        "description": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
-        "operationId": "compat_generate",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/CompatGenerateRequest"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "200": {
-            "description": "Generated Text",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GenerateResponse"
-                }
-              },
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/StreamResponse"
-                }
-              }
-            }
-          },
-          "422": {
-            "description": "Input validation error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Input validation error"
-                }
-              }
-            }
-          },
-          "424": {
-            "description": "Generation Error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Request failed during generation"
-                }
-              }
-            }
-          },
-          "429": {
-            "description": "Model is overloaded",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Model is overloaded"
-                }
-              }
-            }
-          },
-          "500": {
-            "description": "Incomplete generation",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Incomplete generation"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/generate": {
-      "post": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Generate tokens",
-        "description": "Generate tokens",
-        "operationId": "generate",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/GenerateRequest"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "200": {
-            "description": "Generated Text",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/GenerateResponse"
-                }
-              }
-            }
-          },
-          "422": {
-            "description": "Input validation error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Input validation error"
-                }
-              }
-            }
-          },
-          "424": {
-            "description": "Generation Error",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Request failed during generation"
-                }
-              }
-            }
-          },
-          "429": {
-            "description": "Model is overloaded",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Model is overloaded"
-                }
-              }
-            }
-          },
-          "500": {
-            "description": "Incomplete generation",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Incomplete generation"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/generate_stream": {
-      "post": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Generate a stream of token using Server-Sent Events",
-        "description": "Generate a stream of token using Server-Sent Events",
-        "operationId": "generate_stream",
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/GenerateRequest"
-              }
-            }
-          },
-          "required": true
-        },
-        "responses": {
-          "200": {
-            "description": "Generated Text",
-            "content": {
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/StreamResponse"
-                }
-              }
-            }
-          },
-          "422": {
-            "description": "Input validation error",
-            "content": {
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Input validation error"
-                }
-              }
-            }
-          },
-          "424": {
-            "description": "Generation Error",
-            "content": {
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Request failed during generation"
-                }
-              }
-            }
-          },
-          "429": {
-            "description": "Model is overloaded",
-            "content": {
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Model is overloaded"
-                }
-              }
-            }
-          },
-          "500": {
-            "description": "Incomplete generation",
-            "content": {
-              "text/event-stream": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "Incomplete generation"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/health": {
-      "get": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Health check method",
-        "description": "Health check method",
-        "operationId": "health",
-        "responses": {
-          "200": {
-            "description": "Everything is working fine"
-          },
-          "503": {
-            "description": "Text generation inference is down",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorResponse"
-                },
-                "example": {
-                  "error": "unhealthy",
-                  "error_type": "healthcheck"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/info": {
-      "get": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Text Generation Inference endpoint info",
-        "description": "Text Generation Inference endpoint info",
-        "operationId": "get_model_info",
-        "responses": {
-          "200": {
-            "description": "Served model info",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/Info"
-                }
-              }
-            }
-          }
-        }
-      }
-    },
-    "/metrics": {
-      "get": {
-        "tags": [
-          "Text Generation Inference"
-        ],
-        "summary": "Prometheus metrics scrape endpoint",
-        "description": "Prometheus metrics scrape endpoint",
-        "operationId": "metrics",
-        "responses": {
-          "200": {
-            "description": "Prometheus Metrics",
-            "content": {
-              "text/plain": {
-                "schema": {
-                  "type": "string"
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  },
-  "components": {
-    "schemas": {
-      "BestOfSequence": {
-        "type": "object",
-        "required": [
-          "generated_text",
-          "finish_reason",
-          "generated_tokens",
-          "prefill",
-          "tokens"
-        ],
-        "properties": {
-          "finish_reason": {
-            "$ref": "#/components/schemas/FinishReason"
-          },
-          "generated_text": {
-            "type": "string",
-            "example": "test"
-          },
-          "generated_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "example": 1,
-            "minimum": 0
-          },
-          "prefill": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/PrefillToken"
-            }
-          },
-          "seed": {
-            "type": "integer",
-            "format": "int64",
-            "example": 42,
-            "nullable": true,
-            "minimum": 0
-          },
-          "tokens": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Token"
-            }
-          },
-          "top_tokens": {
-            "type": "array",
-            "items": {
-              "type": "array",
-              "items": {
-                "$ref": "#/components/schemas/Token"
-              }
-            }
-          }
-        }
-      },
-      "CompatGenerateRequest": {
-        "type": "object",
-        "required": [
-          "inputs"
-        ],
-        "properties": {
-          "inputs": {
-            "type": "string",
-            "example": "My name is Olivier and I"
-          },
-          "parameters": {
-            "$ref": "#/components/schemas/GenerateParameters"
-          },
-          "stream": {
-            "type": "boolean",
-            "default": "false"
-          }
-        }
-      },
-      "Details": {
-        "type": "object",
-        "required": [
-          "finish_reason",
-          "generated_tokens",
-          "prefill",
-          "tokens"
-        ],
-        "properties": {
-          "best_of_sequences": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/BestOfSequence"
-            },
-            "nullable": true
-          },
-          "finish_reason": {
-            "$ref": "#/components/schemas/FinishReason"
-          },
-          "generated_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "example": 1,
-            "minimum": 0
-          },
-          "prefill": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/PrefillToken"
-            }
-          },
-          "seed": {
-            "type": "integer",
-            "format": "int64",
-            "example": 42,
-            "nullable": true,
-            "minimum": 0
-          },
-          "tokens": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Token"
-            }
-          },
-          "top_tokens": {
-            "type": "array",
-            "items": {
-              "type": "array",
-              "items": {
-                "$ref": "#/components/schemas/Token"
-              }
-            }
-          }
-        }
-      },
-      "ErrorResponse": {
-        "type": "object",
-        "required": [
-          "error",
-          "error_type"
-        ],
-        "properties": {
-          "error": {
-            "type": "string"
-          },
-          "error_type": {
-            "type": "string"
-          }
-        }
-      },
-      "FinishReason": {
-        "type": "string",
-        "enum": [
-          "length",
-          "eos_token",
-          "stop_sequence"
-        ]
-      },
-      "GenerateParameters": {
-        "type": "object",
-        "properties": {
-          "best_of": {
-            "type": "integer",
-            "default": "null",
-            "example": 1,
-            "nullable": true,
-            "minimum": 0,
-            "exclusiveMinimum": 0
-          },
-          "decoder_input_details": {
-            "type": "boolean",
-            "default": "true"
-          },
-          "details": {
-            "type": "boolean",
-            "default": "true"
-          },
-          "do_sample": {
-            "type": "boolean",
-            "default": "false",
-            "example": true
-          },
-          "max_new_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "default": "20",
-            "example": "20",
-            "nullable": true,
-            "minimum": 0
-          },
-          "repetition_penalty": {
-            "type": "number",
-            "format": "float",
-            "default": "null",
-            "example": 1.03,
-            "nullable": true,
-            "exclusiveMinimum": 0
-          },
-          "return_full_text": {
-            "type": "boolean",
-            "default": "null",
-            "example": false,
-            "nullable": true
-          },
-          "seed": {
-            "type": "integer",
-            "format": "int64",
-            "default": "null",
-            "example": "null",
-            "nullable": true,
-            "minimum": 0,
-            "exclusiveMinimum": 0
-          },
-          "stop": {
-            "type": "array",
-            "items": {
-              "type": "string"
-            },
-            "example": [
-              "photographer"
-            ],
-            "maxItems": 4
-          },
-          "temperature": {
-            "type": "number",
-            "format": "float",
-            "default": "null",
-            "example": 0.5,
-            "nullable": true,
-            "exclusiveMinimum": 0
-          },
-          "top_k": {
-            "type": "integer",
-            "format": "int32",
-            "default": "null",
-            "example": 10,
-            "nullable": true,
-            "exclusiveMinimum": 0
-          },
-          "top_n_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "default": "null",
-            "example": 5,
-            "nullable": true,
-            "minimum": 0,
-            "exclusiveMinimum": 0
-          },
-          "top_p": {
-            "type": "number",
-            "format": "float",
-            "default": "null",
-            "example": 0.95,
-            "nullable": true,
-            "maximum": 1,
-            "exclusiveMinimum": 0
-          },
-          "truncate": {
-            "type": "integer",
-            "default": "null",
-            "example": "null",
-            "nullable": true,
-            "minimum": 0
-          },
-          "typical_p": {
-            "type": "number",
-            "format": "float",
-            "default": "null",
-            "example": 0.95,
-            "nullable": true,
-            "maximum": 1,
-            "exclusiveMinimum": 0
-          },
-          "watermark": {
-            "type": "boolean",
-            "default": "false",
-            "example": true
-          }
-        }
-      },
-      "GenerateRequest": {
-        "type": "object",
-        "required": [
-          "inputs"
-        ],
-        "properties": {
-          "inputs": {
-            "type": "string",
-            "example": "My name is Olivier and I"
-          },
-          "parameters": {
-            "$ref": "#/components/schemas/GenerateParameters"
-          }
-        }
-      },
-      "GenerateResponse": {
-        "type": "object",
-        "required": [
-          "generated_text"
-        ],
-        "properties": {
-          "details": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Details"
-              }
-            ],
-            "nullable": true
-          },
-          "generated_text": {
-            "type": "string",
-            "example": "test"
-          }
-        }
-      },
-      "Info": {
-        "type": "object",
-        "required": [
-          "model_id",
-          "model_dtype",
-          "model_device_type",
-          "max_concurrent_requests",
-          "max_best_of",
-          "max_stop_sequences",
-          "max_input_length",
-          "max_total_tokens",
-          "waiting_served_ratio",
-          "max_batch_total_tokens",
-          "max_waiting_tokens",
-          "validation_workers",
-          "version"
-        ],
-        "properties": {
-          "docker_label": {
-            "type": "string",
-            "example": "null",
-            "nullable": true
-          },
-          "max_batch_total_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "example": "32000",
-            "minimum": 0
-          },
-          "max_best_of": {
-            "type": "integer",
-            "example": "2",
-            "minimum": 0
-          },
-          "max_concurrent_requests": {
-            "type": "integer",
-            "description": "Router Parameters",
-            "example": "128",
-            "minimum": 0
-          },
-          "max_input_length": {
-            "type": "integer",
-            "example": "1024",
-            "minimum": 0
-          },
-          "max_stop_sequences": {
-            "type": "integer",
-            "example": "4",
-            "minimum": 0
-          },
-          "max_total_tokens": {
-            "type": "integer",
-            "example": "2048",
-            "minimum": 0
-          },
-          "max_waiting_tokens": {
-            "type": "integer",
-            "example": "20",
-            "minimum": 0
-          },
-          "model_device_type": {
-            "type": "string",
-            "example": "cuda"
-          },
-          "model_dtype": {
-            "type": "string",
-            "example": "torch.float16"
-          },
-          "model_id": {
-            "type": "string",
-            "description": "Model info",
-            "example": "bigscience/blomm-560m"
-          },
-          "model_pipeline_tag": {
-            "type": "string",
-            "example": "text-generation",
-            "nullable": true
-          },
-          "model_sha": {
-            "type": "string",
-            "example": "e985a63cdc139290c5f700ff1929f0b5942cced2",
-            "nullable": true
-          },
-          "sha": {
-            "type": "string",
-            "example": "null",
-            "nullable": true
-          },
-          "validation_workers": {
-            "type": "integer",
-            "example": "2",
-            "minimum": 0
-          },
-          "version": {
-            "type": "string",
-            "description": "Router Info",
-            "example": "0.5.0"
-          },
-          "waiting_served_ratio": {
-            "type": "number",
-            "format": "float",
-            "example": "1.2"
-          }
-        }
-      },
-      "PrefillToken": {
-        "type": "object",
-        "required": [
-          "id",
-          "text",
-          "logprob"
-        ],
-        "properties": {
-          "id": {
-            "type": "integer",
-            "format": "int32",
-            "example": 0,
-            "minimum": 0
-          },
-          "logprob": {
-            "type": "number",
-            "format": "float",
-            "example": -0.34,
-            "nullable": true
-          },
-          "text": {
-            "type": "string",
-            "example": "test"
-          }
-        }
-      },
-      "StreamDetails": {
-        "type": "object",
-        "required": [
-          "finish_reason",
-          "generated_tokens"
-        ],
-        "properties": {
-          "finish_reason": {
-            "$ref": "#/components/schemas/FinishReason"
-          },
-          "generated_tokens": {
-            "type": "integer",
-            "format": "int32",
-            "example": 1,
-            "minimum": 0
-          },
-          "seed": {
-            "type": "integer",
-            "format": "int64",
-            "example": 42,
-            "nullable": true,
-            "minimum": 0
-          }
-        }
-      },
-      "StreamResponse": {
-        "type": "object",
-        "required": [
-          "token"
-        ],
-        "properties": {
-          "details": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/StreamDetails"
-              }
-            ],
-            "default": "null",
-            "nullable": true
-          },
-          "generated_text": {
-            "type": "string",
-            "default": "null",
-            "example": "test",
-            "nullable": true
-          },
-          "token": {
-            "$ref": "#/components/schemas/Token"
-          },
-          "top_tokens": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Token"
-            }
-          }
-        }
-      },
-      "Token": {
-        "type": "object",
-        "required": [
-          "id",
-          "text",
-          "logprob",
-          "special"
-        ],
-        "properties": {
-          "id": {
-            "type": "integer",
-            "format": "int32",
-            "example": 0,
-            "minimum": 0
-          },
-          "logprob": {
-            "type": "number",
-            "format": "float",
-            "example": -0.34,
-            "nullable": true
-          },
-          "special": {
-            "type": "boolean",
-            "example": "false"
-          },
-          "text": {
-            "type": "string",
-            "example": "test"
-          }
-        }
-      }
-    }
-  },
-  "tags": [
-    {
-      "name": "Text Generation Inference",
-      "description": "Hugging Face Text Generation Inference API"
-    }
-  ]
-}
+{"openapi":"3.0.3","info":{"title":"Text Generation Inference","description":"Text Generation Webserver","contact":{"name":"Olivier Dehaene"},"license":{"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0"},"version":"1.3.4"},"paths":{"/":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens if `stream == false` or a stream of token if `stream == true`","description":"Generate tokens if `stream == false` or a stream of token if `stream == true`","operationId":"compat_generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompatGenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate_stream":{"post":{"tags":["Text Generation Inference"],"summary":"Generate a stream of token using Server-Sent Events","description":"Generate a stream of token using Server-Sent Events","operationId":"generate_stream","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/health":{"get":{"tags":["Text Generation Inference"],"summary":"Health check method","description":"Health check method","operationId":"health","responses":{"200":{"description":"Everything is working fine"},"503":{"description":"Text generation inference is down","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"unhealthy","error_type":"healthcheck"}}}}}}},"/info":{"get":{"tags":["Text Generation Inference"],"summary":"Text Generation Inference endpoint info","description":"Text Generation Inference endpoint info","operationId":"get_model_info","responses":{"200":{"description":"Served model info","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Info"}}}}}}},"/metrics":{"get":{"tags":["Text Generation Inference"],"summary":"Prometheus metrics scrape endpoint","description":"Prometheus metrics scrape endpoint","operationId":"metrics","responses":{"200":{"description":"Prometheus Metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}},"/tokenize":{"post":{"tags":["Text Generation Inference"],"summary":"Tokenize inputs","description":"Tokenize inputs","operationId":"tokenize","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeRequest"}}},"required":true},"responses":{"200":{"description":"Tokenized ids","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeResponse"}}}},"404":{"description":"No tokenizer found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"No fast tokenizer available"}}}}}}},"/v1/chat/completions":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"chat_completions","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionChunk"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}}},"components":{"schemas":{"BestOfSequence":{"type":"object","required":["generated_text","finish_reason","generated_tokens","prefill","tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_text":{"type":"string","example":"test"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"CompatGenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"},"stream":{"type":"boolean","default":"false"}}},"Details":{"type":"object","required":["finish_reason","generated_tokens","prefill","tokens"],"properties":{"best_of_sequences":{"type":"array","items":{"$ref":"#/components/schemas/BestOfSequence"},"nullable":true},"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ErrorResponse":{"type":"object","required":["error","error_type"],"properties":{"error":{"type":"string"},"error_type":{"type":"string"}}},"FinishReason":{"type":"string","enum":["length","eos_token","stop_sequence"]},"GenerateParameters":{"type":"object","properties":{"best_of":{"type":"integer","default":"null","example":1,"nullable":true,"minimum":0,"exclusiveMinimum":0},"decoder_input_details":{"type":"boolean","default":"true"},"details":{"type":"boolean","default":"true"},"do_sample":{"type":"boolean","default":"false","example":true},"max_new_tokens":{"type":"integer","format":"int32","default":"100","example":"20","nullable":true,"minimum":0},"repetition_penalty":{"type":"number","format":"float","default":"null","example":1.03,"nullable":true,"exclusiveMinimum":0},"return_full_text":{"type":"boolean","default":"null","example":false,"nullable":true},"seed":{"type":"integer","format":"int64","default":"null","example":"null","nullable":true,"minimum":0,"exclusiveMinimum":0},"stop":{"type":"array","items":{"type":"string"},"example":["photographer"],"maxItems":4},"temperature":{"type":"number","format":"float","default":"null","example":0.5,"nullable":true,"exclusiveMinimum":0},"top_k":{"type":"integer","format":"int32","default":"null","example":10,"nullable":true,"exclusiveMinimum":0},"top_n_tokens":{"type":"integer","format":"int32","default":"null","example":5,"nullable":true,"minimum":0,"exclusiveMinimum":0},"top_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"truncate":{"type":"integer","default":"null","example":"null","nullable":true,"minimum":0},"typical_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"watermark":{"type":"boolean","default":"false","example":true}}},"GenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"}}},"GenerateResponse":{"type":"object","required":["generated_text"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/Details"}],"nullable":true},"generated_text":{"type":"string","example":"test"}}},"Info":{"type":"object","required":["model_id","model_dtype","model_device_type","max_concurrent_requests","max_best_of","max_stop_sequences","max_input_length","max_total_tokens","waiting_served_ratio","max_batch_total_tokens","max_waiting_tokens","validation_workers","version"],"properties":{"docker_label":{"type":"string","example":"null","nullable":true},"max_batch_total_tokens":{"type":"integer","format":"int32","example":"32000","minimum":0},"max_best_of":{"type":"integer","example":"2","minimum":0},"max_concurrent_requests":{"type":"integer","description":"Router Parameters","example":"128","minimum":0},"max_input_length":{"type":"integer","example":"1024","minimum":0},"max_stop_sequences":{"type":"integer","example":"4","minimum":0},"max_total_tokens":{"type":"integer","example":"2048","minimum":0},"max_waiting_tokens":{"type":"integer","example":"20","minimum":0},"model_device_type":{"type":"string","example":"cuda"},"model_dtype":{"type":"string","example":"torch.float16"},"model_id":{"type":"string","description":"Model info","example":"bigscience/blomm-560m"},"model_pipeline_tag":{"type":"string","example":"text-generation","nullable":true},"model_sha":{"type":"string","example":"e985a63cdc139290c5f700ff1929f0b5942cced2","nullable":true},"sha":{"type":"string","example":"null","nullable":true},"validation_workers":{"type":"integer","example":"2","minimum":0},"version":{"type":"string","description":"Router Info","example":"0.5.0"},"waiting_served_ratio":{"type":"number","format":"float","example":"1.2"}}},"PrefillToken":{"type":"object","required":["id","text","logprob"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"text":{"type":"string","example":"test"}}},"StreamDetails":{"type":"object","required":["finish_reason","generated_tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0}}},"StreamResponse":{"type":"object","required":["index","token"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/StreamDetails"}],"default":"null","nullable":true},"generated_text":{"type":"string","default":"null","example":"test","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"token":{"$ref":"#/components/schemas/Token"},"top_tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}},"Token":{"type":"object","required":["id","text","logprob","special"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"special":{"type":"boolean","example":"false"},"text":{"type":"string","example":"test"}}}}},"tags":[{"name":"Text Generation Inference","description":"Hugging Face Text Generation Inference API"}]}
diff --git a/router/src/infer.rs b/router/src/infer.rs
index 8a9875eb..5f078ba0 100644
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@@ -165,6 +165,28 @@ impl Infer {
         ))
     }
 
+    /// Tokenizer the input
+    #[instrument(skip_all)]
+    pub(crate) async fn tokenize(
+        &self,
+        request: GenerateRequest,
+    ) -> Result<Option<tokenizers::Encoding>, InferError> {
+        // Tokenize request
+        let inputs = request.inputs;
+        let truncate = request.parameters.truncate;
+        let encoding = self
+            .validation
+            .tokenize(inputs, truncate)
+            .await
+            .map_err(|err| {
+                tracing::error!("Tokenization {err}");
+                err
+            })?;
+
+        // Return Encoding
+        Ok(encoding.map(|(encoding, _)| encoding))
+    }
+
     /// Apply the chat template to the chat request
     #[instrument(skip_all)]
     pub(crate) fn apply_chat_template(&self, messages: Vec<Message>) -> Result<String, InferError> {
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 894ab466..2bfbbacd 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -444,6 +444,18 @@ pub struct Token {
     special: bool,
 }
 
+#[derive(Debug, Serialize, ToSchema)]
+pub struct SimpleToken {
+    #[schema(example = 0)]
+    id: u32,
+    #[schema(example = "test")]
+    text: String,
+    #[schema(example = 0)]
+    start: usize,
+    #[schema(example = 2)]
+    stop: usize,
+}
+
 #[derive(Serialize, ToSchema)]
 #[serde(rename_all(serialize = "snake_case"))]
 pub(crate) enum FinishReason {
diff --git a/router/src/server.rs b/router/src/server.rs
index ff48b4f0..c5ca4665 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -5,8 +5,8 @@ use crate::validation::ValidationError;
 use crate::{
     BestOfSequence, ChatCompletion, ChatCompletionChunk, ChatRequest, CompatGenerateRequest,
     Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse,
-    HubModelInfo, HubTokenizerConfig, Infer, Info, PrefillToken, StreamDetails, StreamResponse,
-    Token, Validation,
+    HubModelInfo, HubTokenizerConfig, Infer, Info, PrefillToken, SimpleToken, StreamDetails,
+    StreamResponse, Token, Validation,
 };
 use axum::extract::Extension;
 use axum::http::{HeaderMap, Method, StatusCode};
@@ -532,7 +532,7 @@ async fn generate_stream_internal(
     path = "/v1/chat/completions",
     request_body = ChatRequest,
     responses(
-    (status = 200, description = "Generated Text", body = GenerateResponse),
+    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
     (status = 424, description = "Generation Error", body = ErrorResponse,
     example = json ! ({"error": "Request failed during generation"})),
     (status = 429, description = "Model is overloaded", body = ErrorResponse,
@@ -672,6 +672,52 @@ async fn chat_completions(
     }
 }
 
+/// Tokenize inputs
+#[utoipa::path(
+    post,
+    tag = "Text Generation Inference",
+    path = "/tokenize",
+    request_body = TokenizeRequest,
+    responses(
+    (status = 200, description = "Tokenized ids", body = TokenizeResponse),
+    (status = 404, description = "No tokenizer found", body = ErrorResponse,
+    example = json ! ({"error": "No fast tokenizer available"})),
+    )
+    )]
+#[instrument(skip_all)]
+async fn tokenize(
+    Extension(infer): Extension<Infer>,
+    Json(req): Json<GenerateRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    let input = req.inputs.clone();
+    let encoding = infer.tokenize(req).await?;
+    if let Some(encoding) = encoding {
+        let tokens: Vec<SimpleToken> = encoding
+            .get_ids()
+            .iter()
+            .zip(encoding.get_offsets())
+            .map(|(&id, &(start, stop))| {
+                let text: String = input.chars().skip(start).take(stop - start).collect();
+                SimpleToken {
+                    id,
+                    text,
+                    start,
+                    stop,
+                }
+            })
+            .collect();
+        Ok(Json(tokens).into_response())
+    } else {
+        Err((
+            StatusCode::NOT_FOUND,
+            Json(ErrorResponse {
+                error: "No fast tokenizer or tokenizer.json for this model".to_string(),
+                error_type: "no fast tokenizer".to_string(),
+            }),
+        ))
+    }
+}
+
 /// Prometheus metrics scrape endpoint
 #[utoipa::path(
 get,
@@ -719,6 +765,8 @@ pub async fn run(
     compat_generate,
     generate,
     generate_stream,
+    chat_completions,
+    tokenize,
     metrics,
     ),
     components(
@@ -867,6 +915,7 @@ pub async fn run(
         .route("/generate", post(generate))
         .route("/generate_stream", post(generate_stream))
         .route("/v1/chat/completions", post(chat_completions))
+        .route("/tokenize", post(tokenize))
         .route("/health", get(health))
         .route("/ping", get(health))
         .route("/metrics", get(metrics));
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 370e9588..750b98e5 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -70,12 +70,11 @@ impl Validation {
     }
 
     #[instrument(skip(self, inputs))]
-    async fn validate_input(
+    pub async fn tokenize(
         &self,
         inputs: String,
         truncate: Option<usize>,
-        max_new_tokens: Option<u32>,
-    ) -> Result<(String, usize, u32), ValidationError> {
+    ) -> Result<Option<(tokenizers::Encoding, String)>, ValidationError> {
         // If we have a fast tokenizer
         if let Some(sender) = &self.sender {
             // Create response channel
@@ -88,7 +87,24 @@ impl Validation {
 
             // Await on response channel
             // Unwrap is safe here
-            let (inputs, input_length) = response_receiver.await.unwrap()?;
+            let encoding = response_receiver.await.unwrap()?;
+            Ok(Some(encoding))
+        } else {
+            Ok(None)
+        }
+    }
+
+    #[instrument(skip(self, inputs))]
+    async fn validate_input(
+        &self,
+        inputs: String,
+        truncate: Option<usize>,
+        max_new_tokens: Option<u32>,
+    ) -> Result<(String, usize, u32), ValidationError> {
+        // If we have a fast tokenizer
+        if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? {
+            // Create response channel
+            let input_length = encoding.len();
 
             // Get total tokens
             let max_new_tokens: u32 = if let Some(max_new_tokens) = max_new_tokens {
@@ -343,36 +359,31 @@ fn tokenizer_worker(tokenizer: Tokenizer, mut receiver: mpsc::UnboundedReceiver<
 
 /// Get input length and optionally truncate it
 fn prepare_input(
-    inputs: String,
+    mut inputs: String,
     truncate: Option<usize>,
     tokenizer: &Tokenizer,
-) -> Result<(String, usize), ValidationError> {
+) -> Result<(tokenizers::Encoding, String), ValidationError> {
     // Get the number of tokens in the input
     let mut encoding = tokenizer
         .encode(inputs.clone(), true)
         .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
 
     // Optionally truncate
-    let (inputs, input_length) = match truncate {
-        // Truncate is some and < encoding length
-        Some(truncate) if truncate < encoding.len() => {
-            // truncate encoding and decode new inputs
+    if let Some(truncate) = truncate {
+        if truncate < encoding.len() {
             encoding.truncate(truncate, 0, TruncationDirection::Left);
-            let inputs = tokenizer
+            inputs = tokenizer
                 .decode(encoding.get_ids(), false)
                 .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
-            (inputs, encoding.len())
         }
-        // Nothing to do
-        _ => (inputs, encoding.len()),
-    };
+    }
 
-    Ok((inputs, input_length))
+    Ok((encoding, inputs))
 }
 
 type TokenizerRequest = (
     (String, Option<usize>),
-    oneshot::Sender<Result<(String, usize), ValidationError>>,
+    oneshot::Sender<Result<(tokenizers::Encoding, String), ValidationError>>,
     Span,
 );
 

From 7e2a7433d3584a5a68dbf3e71def4323079f2c26 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 25 Jan 2024 09:37:53 -0500
Subject: [PATCH 05/31] feat: adds phi model (#1442)

This PR adds basic modeling for phi-2

run
```bash
text-generation-server \
    serve \
    microsoft/phi-2 \
    --revision 834565c23f9b28b96ccbeabe614dd906b6db551a
```


test
```bash
curl -s localhost:3000/generate \
   -X POST \
   -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
   -H 'Content-Type: application/json' | jq .
# {
#   "generated_text": "\nDeep learning is a subset of machine learning that uses artificial neural networks to learn from data. These"
# }
```



notes
- recently (~1 day ago) the Phi weights and model were updated to
accommodate adding [GQA/MQA attention to the
model.](https://github.com/huggingface/transformers/pull/28163) This
impl expects the original model format so a fixed revision is required
at the moment.
- this PR only includes a basic implementation of the model and can
later be extended for support Flash and Sharded versions as well as make
use of better optimization
---
 .../test_flash_phi/test_flash_phi.json        |  84 ++++
 .../test_flash_phi_all_params.json            |  60 +++
 .../test_flash_phi/test_flash_phi_load.json   | 338 +++++++++++++++
 integration-tests/models/test_flash_phi.py    |  65 +++
 server/pyproject.toml                         |   2 +-
 .../text_generation_server/models/__init__.py |  34 ++
 .../custom_modeling/flash_phi_modeling.py     | 400 ++++++++++++++++++
 .../models/custom_modeling/phi_modeling.py    | 308 ++++++++++++++
 .../models/flash_phi.py                       | 102 +++++
 server/text_generation_server/models/phi.py   |  63 +++
 10 files changed, 1455 insertions(+), 1 deletion(-)
 create mode 100644 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json
 create mode 100644 integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json
 create mode 100644 integration-tests/models/test_flash_phi.py
 create mode 100644 server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
 create mode 100644 server/text_generation_server/models/custom_modeling/phi_modeling.py
 create mode 100644 server/text_generation_server/models/flash_phi.py
 create mode 100644 server/text_generation_server/models/phi.py

diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json
new file mode 100644
index 00000000..51d969b2
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 14402,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 2581,
+        "logprob": -11.6171875,
+        "text": " request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 25,
+        "logprob": -2.3203125,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 1391,
+        "logprob": -0.98779297,
+        "special": false,
+        "text": " {"
+      },
+      {
+        "id": 25927,
+        "logprob": -0.76660156,
+        "special": false,
+        "text": "request"
+      },
+      {
+        "id": 92,
+        "logprob": -0.7246094,
+        "special": false,
+        "text": "}"
+      },
+      {
+        "id": 4943,
+        "logprob": -0.41333008,
+        "special": false,
+        "text": "\")"
+      },
+      {
+        "id": 198,
+        "logprob": -0.11785889,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 50280,
+        "logprob": -0.97265625,
+        "special": false,
+        "text": "        "
+      },
+      {
+        "id": 26209,
+        "logprob": -1.4414062,
+        "special": false,
+        "text": "response"
+      },
+      {
+        "id": 796,
+        "logprob": -0.0569458,
+        "special": false,
+        "text": " ="
+      },
+      {
+        "id": 2116,
+        "logprob": -1.1533203,
+        "special": false,
+        "text": " self"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": ": {request}\")\n        response = self"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json
new file mode 100644
index 00000000..221ff13d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json
@@ -0,0 +1,60 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "stop_sequence",
+    "generated_tokens": 6,
+    "prefill": [
+      {
+        "id": 14402,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 2581,
+        "logprob": -11.6171875,
+        "text": " request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 284,
+        "logprob": -0.19421387,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 3758,
+        "logprob": -0.62597656,
+        "special": false,
+        "text": " send"
+      },
+      {
+        "id": 1366,
+        "logprob": -0.87060547,
+        "special": false,
+        "text": " data"
+      },
+      {
+        "id": 625,
+        "logprob": -0.88427734,
+        "special": false,
+        "text": " over"
+      },
+      {
+        "id": 257,
+        "logprob": -1.0830078,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 3127,
+        "logprob": -1.9462891,
+        "special": false,
+        "text": " network"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request to send data over a network"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json
new file mode 100644
index 00000000..62f7fd32
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json
@@ -0,0 +1,338 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  }
+]
diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py
new file mode 100644
index 00000000..6391f2a1
--- /dev/null
+++ b/integration-tests/models/test_flash_phi.py
@@ -0,0 +1,65 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_phi_handle(launcher):
+    with launcher("microsoft/phi-2", num_shard=1) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_phi(flash_phi_handle):
+    await flash_phi_handle.health(300)
+    return flash_phi_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_phi(flash_phi, response_snapshot):
+    response = await flash_phi.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == ": {request}\")\n        response = self"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_phi_all_params(flash_phi, response_snapshot):
+    response = await flash_phi.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["network"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 6
+    assert response.generated_text == "Test request to send data over a network"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_phi_load(flash_phi, generate_load, response_snapshot):
+    responses = await generate_load(
+        flash_phi, "Test request", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+    assert responses[0].generated_text == ": {request}\")\n        response = self"
+
+    assert responses == response_snapshot
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 6e9be43e..d1452678 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -26,7 +26,7 @@ hf-transfer = "^0.1.2"
 sentencepiece = "^0.1.97"
 tokenizers = "^0.15.0"
 huggingface-hub = "^0.19.3"
-transformers = "^4.36.1"
+transformers = "^4.37.1"
 einops = "^0.6.1"
 texttable = { version = "^1.6.7", optional = true }
 datasets = { version = "^2.14.0", optional = true }
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 39d1d58e..679e1e2f 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -18,6 +18,7 @@ from text_generation_server.models.galactica import GalacticaSharded
 from text_generation_server.models.santacoder import SantaCoder
 from text_generation_server.models.t5 import T5Sharded
 from text_generation_server.models.gpt_neox import GPTNeoxSharded
+from text_generation_server.models.phi import Phi
 
 # The flag below controls whether to allow TF32 on matmul. This flag defaults to False
 # in PyTorch 1.12 and later.
@@ -57,6 +58,7 @@ try:
     from text_generation_server.models.idefics import IDEFICSSharded
     from text_generation_server.models.flash_mistral import FlashMistral
     from text_generation_server.models.flash_mixtral import FlashMixtral
+    from text_generation_server.models.flash_phi import FlashPhi
     from text_generation_server.utils.flash_attn import HAS_FLASH_ATTN_V2_CUDA
 
 except ImportError as e:
@@ -72,6 +74,7 @@ if FLASH_ATTENTION:
     __all__.append(IDEFICSSharded)
     __all__.append(FlashMistral)
     __all__.append(FlashMixtral)
+    __all__.append(FlashPhi)
 
 
 def get_model(
@@ -227,6 +230,37 @@ def get_model(
                 dtype=dtype,
                 trust_remote_code=trust_remote_code,
             )
+        
+    elif model_type == "phi":
+        if FLASH_ATTENTION:
+            return FlashPhi(
+                model_id,
+                revision,
+                quantize=quantize,
+                dtype=dtype,
+                trust_remote_code=trust_remote_code,
+                use_medusa=use_medusa,
+            )
+        else:
+            return CausalLM(
+                model_id,
+                revision,
+                quantize=quantize,
+                dtype=dtype,
+                trust_remote_code=trust_remote_code,
+            )
+
+    elif model_type == "phi-msft":
+        if FLASH_ATTENTION:
+            raise NotImplementedError("Legacy phi-msft is not supported with Flash Attention")
+        else:
+            return Phi(
+                model_id,
+                revision,
+                quantize=quantize,
+                dtype=dtype,
+                trust_remote_code=trust_remote_code,
+            )
 
     elif model_type == "llama" or model_type == "baichuan":
         if FLASH_ATTENTION:
diff --git a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
new file mode 100644
index 00000000..d103973f
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -0,0 +1,400 @@
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    TensorParallelHead,
+    get_linear,
+    FastLayerNorm,
+)
+
+class PhiConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=51200,
+        hidden_size=2560,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=32,
+        hidden_act="gelu_fast",    # llama uses silu
+        layer_norm_eps=1e-05,      # rms in llama,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        resid_pdrop=0.1,           # llama doesn't have this
+        partial_rotary_factor=0.5, # important difference between llama and phi
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.layer_norm_eps = layer_norm_eps
+        self.rope_theta = rope_theta
+        self.resid_pdrop = resid_pdrop
+        self.partial_rotary_factor = partial_rotary_factor
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+# this is the same as llama except for Phi uses bias=True
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=True,
+        )
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    # this is the same as llama except for Phi uses bias=True
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=True, quantize=config.quantize)
+    )
+
+class FlashPhiAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.softmax_scale = self.head_size**-0.5
+        self.rotary_dim = int(config.partial_rotary_factor * self.head_size)
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.rotary_dim,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        # in llama the dense layer is called "o_proj" and has bias=False
+        self.dense = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.dense",
+            weights=weights,
+            bias=True,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        # Compute query, key, value and split
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+
+        # Reshape query and key for rotary embeddings
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        # NOTE: this is the main difference between Llama and Phi
+        # in llama the rotary embeddings are applied to the whole query and key.
+        # Phi uses PARTIAL rotary embeddings, which are applied to the first 32 dimensions
+        #
+        # Apply partial positional embeddings in place
+        self.rotary_emb(
+            query[:, :, :self.rotary_dim], kv[:, 0, :, :self.rotary_dim],
+            cos, sin
+        )
+
+        # Reshape key and value and cache
+        paged_attention.reshape_and_cache(
+            kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.dense(attn_output.view(-1, self.num_heads*self.head_size))
+
+class PhiMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate="tanh"
+                if act in ["gelu_fast", "gelu_pytorch_tanh"]
+                else "none",
+            )
+        )
+
+        # llama weights are up_proj and down_proj and bias=False
+        self.up_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.fc1",
+            weights=weights,
+            bias=True,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.fc2",
+            weights=weights,
+            bias=True,
+        )
+
+    def forward(self, hidden_states):
+        # NOTE: Llama requires the gate up states to an intermediate size
+        # Phi does not and we can avoid the `view` operation
+        return self.down_proj(self.act(self.up_proj(hidden_states)))
+
+
+class FlashPhiLayer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = FlashPhiAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.mlp = PhiMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+        self.input_layernorm = FastLayerNorm.load(
+            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.layer_norm_eps
+        )
+        self.resid_dropout = torch.nn.Dropout(config.resid_pdrop)
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        hidden_states, res = self.input_layernorm(hidden_states, residual)
+        # Self Attention
+        attn_output = self.self_attn(
+            hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        hidden_states = self.resid_dropout(attn_output).add(self.resid_dropout(self.mlp(hidden_states)))
+
+        return hidden_states, res
+
+class FlashPhiModel(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.layers = nn.ModuleList(
+            [
+                FlashPhiLayer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+        self.norm = FastLayerNorm.load(
+            prefix="model.final_layernorm",
+            weights=weights,
+            eps=config.layer_norm_eps,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+class FlashPhiForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = FlashPhiModel(config, weights)
+        self.lm_head = TensorParallelHead.load(
+            config,
+            prefix="lm_head",
+            weights=weights,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor, 
+        max_s: int,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+
+        return self.lm_head(hidden_states)
diff --git a/server/text_generation_server/models/custom_modeling/phi_modeling.py b/server/text_generation_server/models/custom_modeling/phi_modeling.py
new file mode 100644
index 00000000..f9999537
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/phi_modeling.py
@@ -0,0 +1,308 @@
+# imlementation of the PhiModel and PhiForCausalLM classes
+
+import torch
+import torch.distributed
+
+import math
+from torch import nn
+from typing import Optional, List, Tuple, Any
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    TensorParallelHead,
+    FastLinear,
+)
+
+
+# PhiConfig is the configuration class for the PhiModel.
+class PhiConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=51200,
+        n_positions=2048,
+        n_embd=2560,
+        n_layer=32,
+        n_inner=None,
+        n_head=32,
+        rotary_dim=32,
+        layer_norm_epsilon=1e-5,
+        tie_word_embeddings=False,
+        pad_vocab_size_multiple=64,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        no_bias=False,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.n_positions = n_positions
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_inner = n_inner
+        self.n_head = n_head
+        self.rotary_dim = rotary_dim
+
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.tie_word_embeddings = tie_word_embeddings
+        self.pad_vocab_size_multiple = pad_vocab_size_multiple
+        self.pad_token_id = pad_token_id
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.no_bias = no_bias
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+# RotaryEmbedding is a class that implements the rotary embedding.
+class RotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_seq_len):
+        super().__init__()
+        inv_freq = [
+            1.0 / 10000.0 ** (i / dim)
+            for i in range(0, dim, 2)
+        ]
+        inv_freq_len = len(inv_freq)
+        inv_freq = torch.tensor(inv_freq).view(1, inv_freq_len)
+        t = torch.arange(0, max_seq_len, dtype=torch.float).view(max_seq_len, 1)
+        freqs = t.matmul(inv_freq)
+        self.sin = freqs.sin()
+        self.cos = freqs.cos()
+
+    def apply_rotary_emb_qkv(self, qkv, seqlen_offset):
+        b_size, seqlen, three, _, _headdim = qkv.shape
+        if three != 3:
+            raise Exception("unexpected shape for qkv")
+        _, rotary_dim = self.cos.shape
+        rotary_dim = rotary_dim * 2
+        q_rot = qkv[:, :, 0, :, :rotary_dim]
+        q_pass = qkv[:, :, 0, :, rotary_dim:]
+        k_rot = qkv[:, :, 1, :, :rotary_dim]
+        k_pass = qkv[:, :, 1, :, rotary_dim:]
+        q12 = torch.chunk(q_rot, 2, dim=-1)
+        k12 = torch.chunk(k_rot, 2, dim=-1)
+        q1, q2 = q12[0], q12[1]
+        k1, k2 = k12[0], k12[1]
+        c = self.cos.narrow(0, seqlen_offset, seqlen).unsqueeze(1)
+        s = self.sin.narrow(0, seqlen_offset, seqlen).unsqueeze(1)
+        q_rot = torch.cat(
+            [
+                q1 * c - q2 * s,
+                q1 * s + q2 * c,
+            ],
+            dim=-1,
+        )
+        k_rot = torch.cat(
+            [
+                k1 * c - k2 * s,
+                k1 * s + k2 * c,
+            ],
+            dim=-1,
+        )
+        q = torch.cat([q_rot, q_pass], dim=-1)
+        k = torch.cat([k_rot, k_pass], dim=-1)
+        v = qkv[:, :, 2]
+        return q, k, v
+
+
+# PhiCausalLMHead is the head of the PhiModel. It is a linear layer with a layer norm.
+class PhiCausalLMHead(nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.ln = nn.LayerNorm.load(
+            prefix="lm_head.ln",
+            weights=weights,
+            eps=config.layer_norm_epsilon,
+        )
+        self.linear = TensorParallelHead.load(
+            config=config, prefix="lm_head.linear", weights=weights
+        )
+
+    def forward(self, hidden_states):
+        hidden_states = self.ln(hidden_states)
+        hidden_states = self.linear(hidden_states)
+        return hidden_states
+
+# PhiMHA is a multi-head attention layer. This layer uses an attention mask to prevent tokens from attending to subsequent tokens.
+class PhiMHA(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        self.Wqkv = TensorParallelColumnLinear.load(
+            config, prefix=f"{prefix}.Wqkv", weights=weights, bias=not config.no_bias
+        )
+        self.out_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.out_proj",
+            weights=weights,
+            bias=not config.no_bias,
+        )
+        self.op_size = config.n_embd
+        self.head_dim = int(config.n_embd / config.n_head)
+        self.num_heads = config.n_head
+        self.rotary_emb = RotaryEmbedding(
+            config.rotary_dim,
+            config.n_positions,
+        )
+        self.softmax_scale = 1.0 / math.sqrt(self.head_dim)
+
+    def forward(
+        self,
+        hidden_states,
+        past_kv_cache,
+        attention_mask=None,
+    ):
+        b_size, seq_len, _n_embd = hidden_states.shape
+        qkv = self.Wqkv(hidden_states)
+        qkv = qkv.view(b_size, seq_len, 3, self.num_heads, self.head_dim)
+        seqlen_offset = 0 if past_kv_cache is None else past_kv_cache[0].shape[1]
+        q, k, v = self.rotary_emb.apply_rotary_emb_qkv(qkv, seqlen_offset)
+
+        # if there is a kv_cache, then we need to concatenate
+        if past_kv_cache is not None:
+            prev_k, prev_v = past_kv_cache
+            k = torch.cat([prev_k, k], dim=1)
+            v = torch.cat([prev_v, v], dim=1)
+
+        past_kv_cache = [k, v]
+        attn_weights = torch.einsum('bthd,bshd->bhts', q, k * self.softmax_scale)
+
+        if attention_mask is not None:
+            seqlen_k = k.shape[1]
+            seqlen_q = q.shape[1]
+            causal_mask = torch.triu(torch.full((seqlen_q, seqlen_k), -10000.0, device=attn_weights.device), 1)
+            attn_weights = attn_weights + causal_mask.to(dtype=attn_weights.dtype)
+  
+        attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
+        attn_output = attn_weights.matmul(v.transpose(1, 2)).squeeze(0)
+        attn_output = attn_output.view((b_size, self.num_heads, seq_len, self.head_dim)).transpose(1, 2).flatten(-2)
+        return self.out_proj(attn_output), past_kv_cache
+
+# PhiMLP is a multi-layer perceptron. It contains two linear layers with a gelu activation function.
+class PhiMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+
+        self.n_inner = config.n_inner
+        self.fc1 = FastLinear.load(
+            config=config,
+            prefix=f"{prefix}.fc1",
+            weights=weights,
+            bias=False,
+        )
+        self.fc2 = FastLinear.load(
+            config=config,
+            prefix=f"{prefix}.fc2",
+            weights=weights,
+            bias=False,
+        )
+        self.activation = torch.nn.functional.gelu
+            
+    def forward(self, hidden_states):
+        hidden_states = self.fc1(hidden_states)
+        hidden_states = self.activation(hidden_states)
+        hidden_states = self.fc2(hidden_states)
+        return hidden_states
+
+# PhiBlock is a single transformer block. It contains a layer norm, a multi-head attention layer and an multi-layer perceptron.
+class PhiBlock(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        self.layer_id = layer_id
+        self.layer_norm = nn.LayerNorm.load(prefix=f"{layer_id}.ln", weights=weights, eps=config.layer_norm_epsilon)
+        self.mixer = PhiMHA(prefix=f"{layer_id}.mixer", config=config, weights=weights)
+        self.mlp = PhiMLP(prefix=f"{layer_id}.mlp", config=config, weights=weights)
+
+    def forward(
+        self,
+        hidden_states,
+        kv_cache,
+        attention_mask,
+    ):
+        residual = hidden_states
+        hidden_states = self.layer_norm(hidden_states)
+        attn_outputs, past_kv_cache = self.mixer(hidden_states, kv_cache, attention_mask)
+        feed_forward_hidden_states = self.mlp(hidden_states)
+        out = attn_outputs + feed_forward_hidden_states + residual
+        return out, past_kv_cache
+
+# PhiModel implements the embedding layer and the transformer blocks.
+class PhiModel(nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.tp_rank = weights.process_group.rank()
+        self.tp_world_size = weights.process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="transformer.embd.wte", weights=weights
+        )        
+        self.blocks = nn.ModuleList(
+            [PhiBlock(f"transformer.h.{layer_id}", config, weights) for layer_id in range(config.n_layer)]
+        )
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        past_key_values: Optional[List[Tuple[torch.FloatTensor]]] = None,
+        attention_mask: Optional[torch.ByteTensor] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,
+    ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]:
+        hidden_states = self.embed_tokens(input_ids)
+        seq_len = hidden_states.shape[1]
+        mask = None if seq_len <= 1 else attention_mask
+
+        past_key_values = [None] * len(self.blocks) if past_key_values is None else past_key_values
+
+        for index, block in enumerate(self.blocks):
+            hidden_states, new_key_values = block(hidden_states, past_key_values[index], mask)
+            past_key_values[index] = new_key_values
+
+        return hidden_states, past_key_values
+
+# PhiForCausalLM wraps the PhiModel and PhiCausalLMHead together and returns a CausalLMOutputWithPast object.
+class PhiForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.model = PhiModel(config, weights)
+        self.lm_head = PhiCausalLMHead(config, weights)
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        past_key_values: Optional[List[Tuple[torch.FloatTensor]]] = None,
+        attention_mask: Optional[torch.ByteTensor] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,
+        labels: Optional[torch.LongTensor] = None,
+    ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]:
+        model_output = self.model(
+            input_ids, past_key_values, attention_mask, return_dict, use_cache
+        )
+        logits = self.lm_head(model_output[0])
+
+        loss = None
+        if labels is not None:
+            loss = nn.CrossEntropyLoss()(
+                logits[:, :-1].view(-1, logits.size(-1)),
+                labels[:, 1:].view(-1)
+            )
+
+        if not return_dict:
+            return ((loss,) + (logits,) + model_output[1:]) if loss is not None else (logits,) + model_output[1:]
+
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=model_output[1],
+            hidden_states=None,
+            attentions=None,
+        )
+
+        
diff --git a/server/text_generation_server/models/flash_phi.py b/server/text_generation_server/models/flash_phi.py
new file mode 100644
index 00000000..1c49f2a9
--- /dev/null
+++ b/server/text_generation_server/models/flash_phi.py
@@ -0,0 +1,102 @@
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from transformers import AutoConfig, AutoTokenizer
+from typing import Optional
+
+from text_generation_server.models import FlashCausalLM
+from text_generation_server.models.custom_modeling.flash_phi_modeling import (
+    FlashPhiForCausalLM,
+    PhiConfig,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashPhi(FlashCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+        use_medusa: Optional[str] = None,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashPhi is only available on GPU")
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+
+        config = PhiConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashPhiForCausalLM(config, weights)
+        if use_medusa:
+            from text_generation_server.utils.medusa import MedusaModel
+            from huggingface_hub import hf_hub_download
+            import json
+            import os
+            from pathlib import Path
+            
+            is_local_model = (Path(use_medusa).exists() and Path(use_medusa).is_dir()) or os.getenv(
+                "WEIGHTS_CACHE_OVERRIDE", None
+            ) is not None
+            
+            if not is_local_model:
+                medusa_config = hf_hub_download(
+                    use_medusa, revision=revision, filename="config.json"
+                )
+                medusa_head = hf_hub_download(
+                    use_medusa, revision=revision, filename="medusa_lm_head.pt"
+                )
+            else:
+                medusa_config = str(Path(use_medusa) / "config.json")
+                medusa_head = str(Path(use_medusa) / "medusa_lm_head.pt")
+                
+            with open(medusa_config, "r") as f:
+                config = json.load(f)
+            medusa_sf = medusa_head[: -len(".pt")] + ".safetensors"
+            weights = Weights(
+                [medusa_sf], device, dtype, process_group=self.process_group
+            )
+            lm_head = model.lm_head
+            model.lm_head = MedusaModel(config, weights, lm_head)
+
+        torch.distributed.barrier(group=self.process_group)
+        super(FlashPhi, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+        )
diff --git a/server/text_generation_server/models/phi.py b/server/text_generation_server/models/phi.py
new file mode 100644
index 00000000..d477478a
--- /dev/null
+++ b/server/text_generation_server/models/phi.py
@@ -0,0 +1,63 @@
+import torch
+import torch.distributed
+
+from transformers import AutoConfig, AutoTokenizer
+from typing import Optional, List, Tuple
+
+from text_generation_server.models import CausalLM
+from text_generation_server.models.custom_modeling.phi_modeling import PhiConfig, PhiForCausalLM
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+class Phi(CausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, _rank, _world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            if quantize:
+                raise ValueError("quantization is not available on CPU")
+
+            device = torch.device("cpu")
+            dtype = torch.float32 if dtype is None else dtype
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+        config = PhiConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+
+        tokenizer.bos_token_id = config.bos_token_id
+        tokenizer.eos_token_id = config.eos_token_id
+        tokenizer.pad_token = tokenizer.eos_token
+
+        config.quantize = quantize
+        torch.distributed.barrier(group=self.process_group)
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        model = PhiForCausalLM(config, weights)
+        torch.distributed.barrier(group=self.process_group)
+        super(CausalLM, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            requires_padding=True,
+            dtype=dtype,
+            device=device,
+        )
+

From 9c320e260b404c0863358bf0b3bff5bd06cb8745 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Thu, 25 Jan 2024 18:16:03 +0100
Subject: [PATCH 06/31] fix: read stderr in download (#1486)

#1186
---
 launcher/src/main.rs | 48 +++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index c5553000..e635a721 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -4,7 +4,7 @@ use nix::unistd::Pid;
 use serde::Deserialize;
 use std::env;
 use std::ffi::OsString;
-use std::io::{BufRead, BufReader, Lines, Read};
+use std::io::{BufRead, BufReader, Lines};
 use std::os::unix::process::{CommandExt, ExitStatusExt};
 use std::path::Path;
 use std::process::{Child, Command, ExitStatus, Stdio};
@@ -489,6 +489,9 @@ fn shard_manager(
     // Safetensors load fast
     envs.push(("SAFETENSORS_FAST_GPU".into(), "1".into()));
 
+    // Disable progress bar
+    envs.push(("HF_HUB_DISABLE_PROGRESS_BARS".into(), "1".into()));
+
     // Enable hf transfer for insane download speeds
     let enable_hf_transfer = env::var("HF_HUB_ENABLE_HF_TRANSFER").unwrap_or("1".to_string());
     envs.push((
@@ -573,6 +576,13 @@ fn shard_manager(
     thread::spawn(move || {
         log_lines(shard_stdout_reader.lines());
     });
+    // We read stderr in another thread as it seems that lines() can block in some cases
+    let (err_sender, err_receiver) = mpsc::channel();
+    thread::spawn(move || {
+        for line in shard_stderr_reader.lines().flatten() {
+            err_sender.send(line).unwrap_or(());
+        }
+    });
 
     let mut ready = false;
     let start_time = Instant::now();
@@ -580,13 +590,6 @@ fn shard_manager(
     loop {
         // Process exited
         if let Some(exit_status) = p.try_wait().unwrap() {
-            // We read stderr in another thread as it seems that lines() can block in some cases
-            let (err_sender, err_receiver) = mpsc::channel();
-            thread::spawn(move || {
-                for line in shard_stderr_reader.lines().flatten() {
-                    err_sender.send(line).unwrap_or(());
-                }
-            });
             let mut err = String::new();
             while let Ok(line) = err_receiver.recv_timeout(Duration::from_millis(10)) {
                 err = err + "\n" + &line;
@@ -782,6 +785,9 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
     // Copy current process env
     let mut envs: Vec<(OsString, OsString)> = env::vars_os().collect();
 
+    // Disable progress bar
+    envs.push(("HF_HUB_DISABLE_PROGRESS_BARS".into(), "1".into()));
+
     // If huggingface_hub_cache is set, pass it to the download process
     // Useful when running inside a docker container
     if let Some(ref huggingface_hub_cache) = args.huggingface_hub_cache {
@@ -832,12 +838,20 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
         }
     };
 
-    // Redirect STDOUT to the console
-    let download_stdout = download_process.stdout.take().unwrap();
-    let stdout = BufReader::new(download_stdout);
+    let download_stdout = BufReader::new(download_process.stdout.take().unwrap());
 
     thread::spawn(move || {
-        log_lines(stdout.lines());
+        log_lines(download_stdout.lines());
+    });
+
+    let download_stderr = BufReader::new(download_process.stderr.take().unwrap());
+
+    // We read stderr in another thread as it seems that lines() can block in some cases
+    let (err_sender, err_receiver) = mpsc::channel();
+    thread::spawn(move || {
+        for line in download_stderr.lines().flatten() {
+            err_sender.send(line).unwrap_or(());
+        }
     });
 
     loop {
@@ -848,12 +862,10 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
             }
 
             let mut err = String::new();
-            download_process
-                .stderr
-                .take()
-                .unwrap()
-                .read_to_string(&mut err)
-                .unwrap();
+            while let Ok(line) = err_receiver.recv_timeout(Duration::from_millis(10)) {
+                err = err + "\n" + &line;
+            }
+
             if let Some(signal) = status.signal() {
                 tracing::error!(
                     "Download process was signaled to shutdown with signal {signal}: {err}"

From 17b7b75e652394379931c058a8c2db3a000b4225 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 10:13:23 +0100
Subject: [PATCH 07/31] Update the docs

---
 README.md                       | 2 +-
 docs/source/supported_models.md | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1b3041d3..73356f28 100644
--- a/README.md
+++ b/README.md
@@ -198,7 +198,7 @@ Be aware that the official Docker image has them enabled by default.
 
 ## Optimized architectures
 
-TGI works out of the box to serve optimized models in [this list](https://huggingface.co/docs/text-generation-inference/supported_models).
+TGI works out of the box to serve optimized models for all modern models. They can be found in [this list](https://huggingface.co/docs/text-generation-inference/supported_models).
 
 Other architectures are supported on a best-effort basis using:
 
diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md
index dce4f2f9..004790ab 100644
--- a/docs/source/supported_models.md
+++ b/docs/source/supported_models.md
@@ -19,7 +19,9 @@ The following models are optimized and can be served with TGI, which uses custom
 - [MPT](https://huggingface.co/mosaicml/mpt-30b)
 - [Llama V2](https://huggingface.co/meta-llama)
 - [Code Llama](https://huggingface.co/codellama)
-- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
+- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
+- [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
+- [Phi](https://huggingface.co/microsoft/phi-2)
 
 If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models:
 

From 13dd8e2361759f977832a41e7eeff0caf1781b6b Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Fri, 26 Jan 2024 04:41:39 -0500
Subject: [PATCH 08/31] fix: show warning with tokenizer config parsing error
 (#1488)

This tiny PR just prints the parsing error when a tokenizer config fails
to load.

This is helpful when a chat_template wont load due to formatting issues
https://github.com/huggingface/text-generation-inference/pull/1427#issuecomment-1909226388
---
 router/src/main.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/router/src/main.rs b/router/src/main.rs
index b6190908..495fd5bc 100644
--- a/router/src/main.rs
+++ b/router/src/main.rs
@@ -462,7 +462,12 @@ pub async fn get_tokenizer_config(api_repo: &ApiRepo) -> Option<HubTokenizerConf
     let reader = BufReader::new(file);
 
     // Read the JSON contents of the file as an instance of 'HubTokenizerConfig'.
-    let tokenizer_config: HubTokenizerConfig = serde_json::from_reader(reader).ok()?;
+    let tokenizer_config: HubTokenizerConfig = serde_json::from_reader(reader)
+        .map_err(|e| {
+            tracing::warn!("Unable to parse tokenizer config: {}", e);
+            e
+        })
+        .ok()?;
 
     Some(tokenizer_config)
 }

From 16958fe3126387f53d2296fa40c3a15fd3a869f4 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 10:41:58 +0100
Subject: [PATCH 09/31] fix: launcher doc typos (#1473)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Andres Restrepo <andres@thelinuxkid.com>
---
 docs/source/basic_tutorials/launcher.md | 4 ++--
 launcher/src/main.rs                    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md
index 9590e463..bafe3669 100644
--- a/docs/source/basic_tutorials/launcher.md
+++ b/docs/source/basic_tutorials/launcher.md
@@ -60,9 +60,9 @@ Options:
           [env: QUANTIZE=]
 
           Possible values:
-          - awq:              4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=awq. Should replace GPTQ models whereever possible because of the better latency
+          - awq:              4 bit quantization. Requires a specific AWQ quantized model: https://hf.co/models?search=awq. Should replace GPTQ models wherever possible because of the better latency
           - eetq:             8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from https://github.com/NetEase-FuXi/EETQ.git
-          - gptq:             4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq. text-generation-inference will use exllama (faster) kernels whereever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
+          - gptq:             4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq. text-generation-inference will use exllama (faster) kernels wherever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
           - bitsandbytes:     Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half, but it is known that the model will be much slower to run than the native f16
           - bitsandbytes-nf4: Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x, but it is known that the model will be much slower to run than the native f16
           - bitsandbytes-fp4: Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better perplexity performance for you model
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index e635a721..09657c91 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -21,16 +21,16 @@ mod env_runtime;
 
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
-    /// 4 bit quantization. Requires a specific GTPQ quantized model:
+    /// 4 bit quantization. Requires a specific AWQ quantized model:
     ///   https://hf.co/models?search=awq.
-    /// Should replace GPTQ models whereever possible because of the better latency
+    /// Should replace GPTQ models wherever possible because of the better latency
     Awq,
     /// 8 bit quantization, doesn't require specific model.
     /// Should be a drop-in replacement to bitsandbytes with much better performance.
     /// Kernels are from https://github.com/NetEase-FuXi/EETQ.git
     Eetq,
     /// 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq.
-    /// text-generation-inference will use exllama (faster) kernels whereever possible, and use
+    /// text-generation-inference will use exllama (faster) kernels wherever possible, and use
     /// triton kernel (wider support) when it's not.
     /// AWQ has faster kernels.
     Gptq,

From b95732180dc52be869e8c3e752a9c54608a6c7a5 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 14:00:29 +0100
Subject: [PATCH 10/31] Reinstate exl2 with tp (#1490)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 ...t_flash_starcoder_gptq_default_params.json | 26 +++++++++----------
 .../utils/gptq/exllamav2.py                   |  4 +++
 server/text_generation_server/utils/layers.py | 12 ++++-----
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
index 5598a2ad..1ace3814 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
@@ -16,52 +16,52 @@
       },
       {
         "id": 21017,
-        "logprob": -9.09375,
+        "logprob": -9.0859375,
         "text": "ometric"
       },
       {
         "id": 81,
-        "logprob": -0.25976562,
+        "logprob": -0.25830078,
         "text": "_"
       },
       {
         "id": 6009,
-        "logprob": -2.2148438,
+        "logprob": -2.1875,
         "text": "mean"
       },
       {
         "id": 26,
-        "logprob": -0.3010254,
+        "logprob": -0.30004883,
         "text": "("
       },
       {
         "id": 62,
-        "logprob": -5.6757812,
+        "logprob": -5.6171875,
         "text": "L"
       },
       {
         "id": 44,
-        "logprob": -3.0898438,
+        "logprob": -3.078125,
         "text": ":"
       },
       {
         "id": 1682,
-        "logprob": -0.6791992,
+        "logprob": -0.68066406,
         "text": " List"
       },
       {
         "id": 77,
-        "logprob": -0.38891602,
+        "logprob": -0.38745117,
         "text": "["
       },
       {
         "id": 1808,
-        "logprob": -0.92041016,
+        "logprob": -0.9453125,
         "text": "float"
       },
       {
         "id": 10794,
-        "logprob": -2.5390625,
+        "logprob": -2.5371094,
         "text": "]):"
       }
     ],
@@ -69,7 +69,7 @@
     "tokens": [
       {
         "id": 284,
-        "logprob": 0.0,
+        "logprob": -0.051635742,
         "special": false,
         "text": "\n   "
       },
@@ -81,7 +81,7 @@
       },
       {
         "id": 11665,
-        "logprob": -1.6005859,
+        "logprob": -1.2236328,
         "special": false,
         "text": " reduce"
       },
@@ -159,7 +159,7 @@
       },
       {
         "id": 203,
-        "logprob": -0.11968994,
+        "logprob": -0.12695312,
         "special": false,
         "text": "\n"
       },
diff --git a/server/text_generation_server/utils/gptq/exllamav2.py b/server/text_generation_server/utils/gptq/exllamav2.py
index a24e834b..2b897f25 100644
--- a/server/text_generation_server/utils/gptq/exllamav2.py
+++ b/server/text_generation_server/utils/gptq/exllamav2.py
@@ -185,6 +185,10 @@ class QuantLinear(nn.Module):
             "g_idx": self.g_idx,
         }
         temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size())
+
+        # We NEED to keep a pointer on Python side, otherwise the garbage collector will mess with us,
+        # and `Memory access fault by GPU node-2` will EAT you.
+        self.temp_dq = temp_dq
         self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)
 
     def forward(self, x, force_cuda=False):
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index 5a0de0d7..c9393d99 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -35,12 +35,12 @@ except Exception:
 HAS_EXLLAMA = False
 CAN_EXLLAMA = major >= 8
 V2 = os.getenv("EXLLAMA_VERSION", "2") == "2"
-if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
-    V2 = False
-    log_once(
-        logger.warning,
-        "Disabling exllama v2 and using v1 instead because there are issues when sharding",
-    )
+# if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
+#     V2 = False
+#     log_once(
+#         logger.warning,
+#         "Disabling exllama v2 and using v1 instead because there are issues when sharding",
+#     )
 
 if os.getenv("DISABLE_EXLLAMA") == "True":
     HAS_EXLLAMA = False

From ac49972752bd9ea4021b3aeb0ac40a4c70e6eeea Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 14:05:02 +0100
Subject: [PATCH 11/31] Add sealion mpt support (#1477)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

---------

Co-authored-by: Choon Meng Tan <choonmeng@aisingapore.org>
Co-authored-by: David Ong Tat-Wee <13075447+ongtw@users.noreply.github.com>
---
 .../models/custom_modeling/mpt_modeling.py    | 81 ++++++++++++++-----
 1 file changed, 63 insertions(+), 18 deletions(-)

diff --git a/server/text_generation_server/models/custom_modeling/mpt_modeling.py b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
index 5ccf796d..1a9aef74 100644
--- a/server/text_generation_server/models/custom_modeling/mpt_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
@@ -28,7 +28,6 @@ EPS = 1e-5
 
 
 def load_col(config, prefix, weights, bias):
-    assert bias == False, NotImplementedError
     assert config.quantize != "gptq", NotImplementedError
     slice_ = weights._get_slice(f"{prefix}.weight")
     rank = weights.process_group.rank()
@@ -45,7 +44,26 @@ def load_col(config, prefix, weights, bias):
     if weight.dtype != torch.int32:
         weight = weight.to(dtype=weights.dtype)
     weight = weight.to(device=weights.device)
-    bias = None
+
+    if bias:
+        bias_slice_ = weights._get_slice(f"{prefix}.bias")
+        bias_rank = weights.process_group.rank()
+        bias_size = weights.process_group.size()
+
+        bias_h = bias_slice_.get_shape()
+        bias_h = bias_h[0]
+        bias_block_size = bias_h // bias_size
+
+        bias_q_part = bias_slice_[bias_rank * bias_block_size : (bias_rank + 1) * bias_block_size]
+        bias_k_part = bias_slice_[bias_h + bias_rank * bias_block_size : bias_h + (bias_rank + 1) * bias_block_size]
+        bias_v_part = bias_slice_[2 * bias_h + bias_rank * bias_block_size : 2 * bias_h + (bias_rank + 1) * bias_block_size]
+
+        bias = torch.cat([bias_q_part, bias_k_part, bias_v_part], dim=0)
+        if bias.dtype != torch.int32:
+            bias = bias.to(dtype=weights.dtype)
+        bias = bias.to(device=weights.device)
+    else:
+        bias = None
     linear = get_linear(weight, bias, config.quantize)
     return TensorParallelColumnLinear(linear)
 
@@ -330,7 +348,12 @@ class MultiheadAttention(nn.Module):
             config, prefix=f"{prefix}.Wqkv", weights=weights, bias=not config.no_bias
         )
         if self.qk_ln:
-            raise NotImplementedError("qk_ln is not supported")
+            bias = not config.no_bias
+            hidden_size = config.d_model
+            head_dim = hidden_size // self.n_heads
+
+            self.q_ln = LPLayerNorm(d_model, bias=bias, prefix=f"{prefix}.q_ln", weights=weights)
+            self.k_ln = LPLayerNorm(self.n_heads * head_dim, prefix=f"{prefix}.k_ln", weights=weights)
         if self.attn_impl == "flash":
             self.attn_fn = flash_attn_fn
         elif self.attn_impl == "triton":
@@ -581,12 +604,20 @@ class MPTBlock(nn.Module):
                 f"""Not implemented attn {config.attn_config["attn_type"]}"""
             )
         resid_pdrop = config.resid_pdrop
-        self.norm_1 = nn.LayerNorm.load_no_bias(
-            prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
-        )
-        self.norm_2 = nn.LayerNorm.load_no_bias(
-            prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
-        )
+        if config.no_bias:
+            self.norm_1 = nn.LayerNorm.load_no_bias(
+                prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
+            )
+            self.norm_2 = nn.LayerNorm.load_no_bias(
+                prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
+            )
+        else:
+            self.norm_1 = nn.LayerNorm.load(
+                prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
+            )
+            self.norm_2 = nn.LayerNorm.load(
+                prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
+            )
         self.attn = MultiheadAttention(config, prefix=f"{prefix}.attn", weights=weights)
         self.ffn = MPTMLP(config, prefix=f"{prefix}.ffn", weights=weights)
         self.resid_attn_dropout = nn.Dropout(resid_pdrop)
@@ -635,6 +666,9 @@ class LPLayerNorm(torch.nn.LayerNorm):
         elementwise_affine=True,
         device=None,
         dtype=None,
+        bias: Optional[bool] = True,
+        prefix=None,
+        weights=None,
     ):
         super().__init__(
             normalized_shape=normalized_shape,
@@ -642,7 +676,14 @@ class LPLayerNorm(torch.nn.LayerNorm):
             elementwise_affine=elementwise_affine,
             device=device,
             dtype=dtype,
+            bias=bias,
         )
+        if weights is not None:
+            self.weight = nn.Parameter(weights.get_sharded(f"{prefix}.weight", dim=0))
+            if bias:
+                self.bias = nn.Parameter(weights.get_sharded(f"{prefix}.bias", dim=0))
+            self.normalized_shape = self.weight.shape
+
 
     def forward(self, x):
         module_device = x.device
@@ -755,20 +796,23 @@ class MPTModel(MPTPreTrainedModel):
             )
 
         self.wte = TensorParallelEmbedding("transformer.wte", weights)
+
         if not self.alibi:
-            # self.wpe = torch.nn.Embedding(
-            #     config.max_seq_len, config.d_model, device=config.init_device
-            # )
-            raise RuntimeError("no alibi no supported")
+           self.wpe = TensorParallelEmbedding("transformer.wpe", weights)
         self.blocks = nn.ModuleList(
             [
                 MPTBlock(config, prefix=f"transformer.blocks.{i}", weights=weights)
                 for i in range(config.n_layers)
             ]
         )
-        self.norm_f = nn.LayerNorm.load_no_bias(
-            prefix="transformer.norm_f", weights=weights, eps=EPS
-        )
+        if config.no_bias:
+            self.norm_f = nn.LayerNorm.load_no_bias(
+                prefix="transformer.norm_f", weights=weights, eps=EPS
+            )
+        else:
+            self.norm_f = nn.LayerNorm.load(
+                prefix="transformer.norm_f", weights=weights, eps=EPS
+            )
         self.is_causal = not self.prefix_lm
         self._attn_bias_initialized = False
         self.attn_bias = None
@@ -787,8 +831,9 @@ class MPTModel(MPTPreTrainedModel):
                     if config.verbose:
                         warnings.warn(f"Removing bias ({module.bias}) from {module}.")
                     module.register_parameter("bias", None)
-        if config.verbose and config.verbose > 2:
-            print(self)
+        if hasattr(self.config, "verbose"):
+            if config.verbose and config.verbose > 2:
+                print(self)
         if "verbose" not in self.config.init_config:
             self.config.init_config["verbose"] = self.config.verbose
         if self.config.init_config["verbose"] > 1:

From 4c7315dde5a7883562c004f6a2ad69eb6ed16319 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 14:06:27 +0100
Subject: [PATCH 12/31] Trying to fix that flaky test. (#1491)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 router/src/lib.rs | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/router/src/lib.rs b/router/src/lib.rs
index 2bfbbacd..6c16c4b3 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -548,26 +548,12 @@ pub(crate) struct ErrorResponse {
 
 #[cfg(test)]
 mod tests {
-    use std::io::Write;
     use tokenizers::Tokenizer;
 
     pub(crate) async fn get_tokenizer() -> Tokenizer {
-        let filename = std::path::Path::new("tokenizer.json");
-        if !filename.exists() {
-            let content = reqwest::get("https://huggingface.co/gpt2/raw/main/tokenizer.json")
-                .await
-                .unwrap()
-                .bytes()
-                .await
-                .unwrap();
-            let tmp_filename = "tokenizer.json.temp";
-            let mut file = std::fs::File::create(tmp_filename).unwrap();
-            file.write_all(&content).unwrap();
-            // Re-check if another process has written this file maybe.
-            if !filename.exists() {
-                std::fs::rename(tmp_filename, filename).unwrap()
-            }
-        }
-        Tokenizer::from_file("tokenizer.json").unwrap()
+        let api = hf_hub::api::sync::Api::new().unwrap();
+        let repo = api.model("gpt2".to_string());
+        let filename = repo.get("tokenizer.json").unwrap();
+        Tokenizer::from_file(filename).unwrap()
     }
 }

From 50a20a83d727de52f609e9cd51d3a85c8a4ba110 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Restrepo?= <andres@thelinuxkid.com>
Date: Fri, 26 Jan 2024 08:10:07 -0500
Subject: [PATCH 13/31] fix: launcher doc typos (#1462)

# What does this PR do?

fixes launcher doc typos

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [x] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

@OlivierDehaene OR @Narsil

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

From ebecc06161d3399aa1dace7be1a7a86efec85f8d Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 16:07:31 +0100
Subject: [PATCH 14/31] Update the docs to include newer models. (#1492)

---
 docs/openapi.json    |  2 +-
 router/src/lib.rs    | 31 +++++++++++++++++++++++++------
 router/src/server.rs | 22 +++++++++++++++-------
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/docs/openapi.json b/docs/openapi.json
index 4454259b..9a9ed116 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1 +1 @@
-{"openapi":"3.0.3","info":{"title":"Text Generation Inference","description":"Text Generation Webserver","contact":{"name":"Olivier Dehaene"},"license":{"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0"},"version":"1.3.4"},"paths":{"/":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens if `stream == false` or a stream of token if `stream == true`","description":"Generate tokens if `stream == false` or a stream of token if `stream == true`","operationId":"compat_generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompatGenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate_stream":{"post":{"tags":["Text Generation Inference"],"summary":"Generate a stream of token using Server-Sent Events","description":"Generate a stream of token using Server-Sent Events","operationId":"generate_stream","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/health":{"get":{"tags":["Text Generation Inference"],"summary":"Health check method","description":"Health check method","operationId":"health","responses":{"200":{"description":"Everything is working fine"},"503":{"description":"Text generation inference is down","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"unhealthy","error_type":"healthcheck"}}}}}}},"/info":{"get":{"tags":["Text Generation Inference"],"summary":"Text Generation Inference endpoint info","description":"Text Generation Inference endpoint info","operationId":"get_model_info","responses":{"200":{"description":"Served model info","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Info"}}}}}}},"/metrics":{"get":{"tags":["Text Generation Inference"],"summary":"Prometheus metrics scrape endpoint","description":"Prometheus metrics scrape endpoint","operationId":"metrics","responses":{"200":{"description":"Prometheus Metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}},"/tokenize":{"post":{"tags":["Text Generation Inference"],"summary":"Tokenize inputs","description":"Tokenize inputs","operationId":"tokenize","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeRequest"}}},"required":true},"responses":{"200":{"description":"Tokenized ids","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeResponse"}}}},"404":{"description":"No tokenizer found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"No fast tokenizer available"}}}}}}},"/v1/chat/completions":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"chat_completions","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionChunk"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}}},"components":{"schemas":{"BestOfSequence":{"type":"object","required":["generated_text","finish_reason","generated_tokens","prefill","tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_text":{"type":"string","example":"test"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"CompatGenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"},"stream":{"type":"boolean","default":"false"}}},"Details":{"type":"object","required":["finish_reason","generated_tokens","prefill","tokens"],"properties":{"best_of_sequences":{"type":"array","items":{"$ref":"#/components/schemas/BestOfSequence"},"nullable":true},"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ErrorResponse":{"type":"object","required":["error","error_type"],"properties":{"error":{"type":"string"},"error_type":{"type":"string"}}},"FinishReason":{"type":"string","enum":["length","eos_token","stop_sequence"]},"GenerateParameters":{"type":"object","properties":{"best_of":{"type":"integer","default":"null","example":1,"nullable":true,"minimum":0,"exclusiveMinimum":0},"decoder_input_details":{"type":"boolean","default":"true"},"details":{"type":"boolean","default":"true"},"do_sample":{"type":"boolean","default":"false","example":true},"max_new_tokens":{"type":"integer","format":"int32","default":"100","example":"20","nullable":true,"minimum":0},"repetition_penalty":{"type":"number","format":"float","default":"null","example":1.03,"nullable":true,"exclusiveMinimum":0},"return_full_text":{"type":"boolean","default":"null","example":false,"nullable":true},"seed":{"type":"integer","format":"int64","default":"null","example":"null","nullable":true,"minimum":0,"exclusiveMinimum":0},"stop":{"type":"array","items":{"type":"string"},"example":["photographer"],"maxItems":4},"temperature":{"type":"number","format":"float","default":"null","example":0.5,"nullable":true,"exclusiveMinimum":0},"top_k":{"type":"integer","format":"int32","default":"null","example":10,"nullable":true,"exclusiveMinimum":0},"top_n_tokens":{"type":"integer","format":"int32","default":"null","example":5,"nullable":true,"minimum":0,"exclusiveMinimum":0},"top_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"truncate":{"type":"integer","default":"null","example":"null","nullable":true,"minimum":0},"typical_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"watermark":{"type":"boolean","default":"false","example":true}}},"GenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"}}},"GenerateResponse":{"type":"object","required":["generated_text"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/Details"}],"nullable":true},"generated_text":{"type":"string","example":"test"}}},"Info":{"type":"object","required":["model_id","model_dtype","model_device_type","max_concurrent_requests","max_best_of","max_stop_sequences","max_input_length","max_total_tokens","waiting_served_ratio","max_batch_total_tokens","max_waiting_tokens","validation_workers","version"],"properties":{"docker_label":{"type":"string","example":"null","nullable":true},"max_batch_total_tokens":{"type":"integer","format":"int32","example":"32000","minimum":0},"max_best_of":{"type":"integer","example":"2","minimum":0},"max_concurrent_requests":{"type":"integer","description":"Router Parameters","example":"128","minimum":0},"max_input_length":{"type":"integer","example":"1024","minimum":0},"max_stop_sequences":{"type":"integer","example":"4","minimum":0},"max_total_tokens":{"type":"integer","example":"2048","minimum":0},"max_waiting_tokens":{"type":"integer","example":"20","minimum":0},"model_device_type":{"type":"string","example":"cuda"},"model_dtype":{"type":"string","example":"torch.float16"},"model_id":{"type":"string","description":"Model info","example":"bigscience/blomm-560m"},"model_pipeline_tag":{"type":"string","example":"text-generation","nullable":true},"model_sha":{"type":"string","example":"e985a63cdc139290c5f700ff1929f0b5942cced2","nullable":true},"sha":{"type":"string","example":"null","nullable":true},"validation_workers":{"type":"integer","example":"2","minimum":0},"version":{"type":"string","description":"Router Info","example":"0.5.0"},"waiting_served_ratio":{"type":"number","format":"float","example":"1.2"}}},"PrefillToken":{"type":"object","required":["id","text","logprob"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"text":{"type":"string","example":"test"}}},"StreamDetails":{"type":"object","required":["finish_reason","generated_tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0}}},"StreamResponse":{"type":"object","required":["index","token"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/StreamDetails"}],"default":"null","nullable":true},"generated_text":{"type":"string","default":"null","example":"test","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"token":{"$ref":"#/components/schemas/Token"},"top_tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}},"Token":{"type":"object","required":["id","text","logprob","special"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"special":{"type":"boolean","example":"false"},"text":{"type":"string","example":"test"}}}}},"tags":[{"name":"Text Generation Inference","description":"Hugging Face Text Generation Inference API"}]}
+{"openapi":"3.0.3","info":{"title":"Text Generation Inference","description":"Text Generation Webserver","contact":{"name":"Olivier Dehaene"},"license":{"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0"},"version":"1.3.4"},"paths":{"/":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens if `stream == false` or a stream of token if `stream == true`","description":"Generate tokens if `stream == false` or a stream of token if `stream == true`","operationId":"compat_generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompatGenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate_stream":{"post":{"tags":["Text Generation Inference"],"summary":"Generate a stream of token using Server-Sent Events","description":"Generate a stream of token using Server-Sent Events","operationId":"generate_stream","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/health":{"get":{"tags":["Text Generation Inference"],"summary":"Health check method","description":"Health check method","operationId":"health","responses":{"200":{"description":"Everything is working fine"},"503":{"description":"Text generation inference is down","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"unhealthy","error_type":"healthcheck"}}}}}}},"/info":{"get":{"tags":["Text Generation Inference"],"summary":"Text Generation Inference endpoint info","description":"Text Generation Inference endpoint info","operationId":"get_model_info","responses":{"200":{"description":"Served model info","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Info"}}}}}}},"/metrics":{"get":{"tags":["Text Generation Inference"],"summary":"Prometheus metrics scrape endpoint","description":"Prometheus metrics scrape endpoint","operationId":"metrics","responses":{"200":{"description":"Prometheus Metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}},"/tokenize":{"post":{"tags":["Text Generation Inference"],"summary":"Tokenize inputs","description":"Tokenize inputs","operationId":"tokenize","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Tokenized ids","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeResponse"}}}},"404":{"description":"No tokenizer found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"No fast tokenizer available"}}}}}}},"/v1/chat/completions":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"chat_completions","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionChunk"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}}},"components":{"schemas":{"BestOfSequence":{"type":"object","required":["generated_text","finish_reason","generated_tokens","prefill","tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_text":{"type":"string","example":"test"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ChatCompletion":{"type":"object","required":["id","object","created","model","system_fingerprint","choices","usage"],"properties":{"choices":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionComplete"}},"created":{"type":"integer","format":"int64","example":"1706270835","minimum":0},"id":{"type":"string"},"model":{"type":"string","example":"mistralai/Mistral-7B-Instruct-v0.2"},"object":{"type":"string"},"system_fingerprint":{"type":"string"},"usage":{"$ref":"#/components/schemas/Usage"}}},"ChatCompletionChoice":{"type":"object","required":["index","delta"],"properties":{"delta":{"$ref":"#/components/schemas/ChatCompletionDelta"},"finish_reason":{"type":"string","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"logprobs":{"type":"number","format":"float","nullable":true}}},"ChatCompletionChunk":{"type":"object","required":["id","object","created","model","system_fingerprint","choices"],"properties":{"choices":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionChoice"}},"created":{"type":"integer","format":"int64","example":"1706270978","minimum":0},"id":{"type":"string"},"model":{"type":"string","example":"mistralai/Mistral-7B-Instruct-v0.2"},"object":{"type":"string"},"system_fingerprint":{"type":"string"}}},"ChatCompletionDelta":{"type":"object","required":["role","content"],"properties":{"content":{"type":"string","example":"What is Deep Learning?"},"role":{"type":"string","example":"user"}}},"ChatRequest":{"type":"object","required":["model"],"properties":{"frequency_penalty":{"type":"number","format":"float","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.","example":"1.0","nullable":true},"logit_bias":{"type":"array","items":{"type":"number","format":"float"},"description":"UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.","nullable":true},"logprobs":{"type":"boolean","description":"Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.","example":"false","nullable":true},"max_tokens":{"type":"integer","format":"int32","description":"The maximum number of tokens that can be generated in the chat completion.","example":"32","nullable":true,"minimum":0},"messages":{"type":"array","items":{"$ref":"#/components/schemas/Message"},"description":"A list of messages comprising the conversation so far."},"model":{"type":"string","description":"UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.","example":"mistralai/Mistral-7B-Instruct-v0.2"},"n":{"type":"integer","format":"int32","description":"UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.","example":"2","nullable":true,"minimum":0},"presence_penalty":{"type":"number","format":"float","description":"UNUSED\nNumber between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics","example":0.1,"nullable":true},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"stream":{"type":"boolean"},"temperature":{"type":"number","format":"float","description":"What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.","example":1.0,"nullable":true},"top_logprobs":{"type":"integer","format":"int32","description":"UNUSED\nAn integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.","example":"5","nullable":true,"minimum":0},"top_p":{"type":"number","format":"float","description":"An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.","example":0.95,"nullable":true}}},"CompatGenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"},"stream":{"type":"boolean","default":"false"}}},"Details":{"type":"object","required":["finish_reason","generated_tokens","prefill","tokens"],"properties":{"best_of_sequences":{"type":"array","items":{"$ref":"#/components/schemas/BestOfSequence"},"nullable":true},"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ErrorResponse":{"type":"object","required":["error","error_type"],"properties":{"error":{"type":"string"},"error_type":{"type":"string"}}},"FinishReason":{"type":"string","enum":["length","eos_token","stop_sequence"],"example":"Length"},"GenerateParameters":{"type":"object","properties":{"best_of":{"type":"integer","default":"null","example":1,"nullable":true,"minimum":0,"exclusiveMinimum":0},"decoder_input_details":{"type":"boolean","default":"true"},"details":{"type":"boolean","default":"true"},"do_sample":{"type":"boolean","default":"false","example":true},"max_new_tokens":{"type":"integer","format":"int32","default":"100","example":"20","nullable":true,"minimum":0},"repetition_penalty":{"type":"number","format":"float","default":"null","example":1.03,"nullable":true,"exclusiveMinimum":0},"return_full_text":{"type":"boolean","default":"null","example":false,"nullable":true},"seed":{"type":"integer","format":"int64","default":"null","example":"null","nullable":true,"minimum":0,"exclusiveMinimum":0},"stop":{"type":"array","items":{"type":"string"},"example":["photographer"],"maxItems":4},"temperature":{"type":"number","format":"float","default":"null","example":0.5,"nullable":true,"exclusiveMinimum":0},"top_k":{"type":"integer","format":"int32","default":"null","example":10,"nullable":true,"exclusiveMinimum":0},"top_n_tokens":{"type":"integer","format":"int32","default":"null","example":5,"nullable":true,"minimum":0,"exclusiveMinimum":0},"top_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"truncate":{"type":"integer","default":"null","example":"null","nullable":true,"minimum":0},"typical_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"watermark":{"type":"boolean","default":"false","example":true}}},"GenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"}}},"GenerateResponse":{"type":"object","required":["generated_text"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/Details"}],"nullable":true},"generated_text":{"type":"string","example":"test"}}},"Info":{"type":"object","required":["model_id","model_dtype","model_device_type","max_concurrent_requests","max_best_of","max_stop_sequences","max_input_length","max_total_tokens","waiting_served_ratio","max_batch_total_tokens","max_waiting_tokens","validation_workers","version"],"properties":{"docker_label":{"type":"string","example":"null","nullable":true},"max_batch_total_tokens":{"type":"integer","format":"int32","example":"32000","minimum":0},"max_best_of":{"type":"integer","example":"2","minimum":0},"max_concurrent_requests":{"type":"integer","description":"Router Parameters","example":"128","minimum":0},"max_input_length":{"type":"integer","example":"1024","minimum":0},"max_stop_sequences":{"type":"integer","example":"4","minimum":0},"max_total_tokens":{"type":"integer","example":"2048","minimum":0},"max_waiting_tokens":{"type":"integer","example":"20","minimum":0},"model_device_type":{"type":"string","example":"cuda"},"model_dtype":{"type":"string","example":"torch.float16"},"model_id":{"type":"string","description":"Model info","example":"bigscience/blomm-560m"},"model_pipeline_tag":{"type":"string","example":"text-generation","nullable":true},"model_sha":{"type":"string","example":"e985a63cdc139290c5f700ff1929f0b5942cced2","nullable":true},"sha":{"type":"string","example":"null","nullable":true},"validation_workers":{"type":"integer","example":"2","minimum":0},"version":{"type":"string","description":"Router Info","example":"0.5.0"},"waiting_served_ratio":{"type":"number","format":"float","example":"1.2"}}},"Message":{"type":"object","required":["role","content"],"properties":{"content":{"type":"string","example":"My name is David and I"},"role":{"type":"string","example":"user"}}},"PrefillToken":{"type":"object","required":["id","text","logprob"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"text":{"type":"string","example":"test"}}},"SimpleToken":{"type":"object","required":["id","text","start","stop"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"start":{"type":"integer","example":0,"minimum":0},"stop":{"type":"integer","example":2,"minimum":0},"text":{"type":"string","example":"test"}}},"StreamDetails":{"type":"object","required":["finish_reason","generated_tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0}}},"StreamResponse":{"type":"object","required":["index","token"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/StreamDetails"}],"default":"null","nullable":true},"generated_text":{"type":"string","default":"null","example":"test","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"token":{"$ref":"#/components/schemas/Token"},"top_tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}},"Token":{"type":"object","required":["id","text","logprob","special"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"special":{"type":"boolean","example":"false"},"text":{"type":"string","example":"test"}}},"TokenizeResponse":{"type":"array","items":{"$ref":"#/components/schemas/SimpleToken"}}}},"tags":[{"name":"Text Generation Inference","description":"Hugging Face Text Generation Inference API"}]}
\ No newline at end of file
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 6c16c4b3..fc5670a0 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -188,18 +188,20 @@ fn default_parameters() -> GenerateParameters {
     }
 }
 
-#[derive(Clone, Deserialize, Serialize)]
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
 pub(crate) struct ChatCompletion {
     pub id: String,
     pub object: String,
+    #[schema(example = "1706270835")]
     pub created: u64,
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
     pub model: String,
     pub system_fingerprint: String,
     pub choices: Vec<ChatCompletionComplete>,
     pub usage: Usage,
 }
 
-#[derive(Clone, Deserialize, Serialize)]
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
 pub(crate) struct ChatCompletionComplete {
     pub index: u32,
     pub message: Message,
@@ -248,17 +250,19 @@ impl ChatCompletion {
     }
 }
 
-#[derive(Clone, Deserialize, Serialize)]
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
 pub(crate) struct ChatCompletionChunk {
     pub id: String,
     pub object: String,
+    #[schema(example = "1706270978")]
     pub created: u64,
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
     pub model: String,
     pub system_fingerprint: String,
     pub choices: Vec<ChatCompletionChoice>,
 }
 
-#[derive(Clone, Deserialize, Serialize)]
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
 pub(crate) struct ChatCompletionChoice {
     pub index: u32,
     pub delta: ChatCompletionDelta,
@@ -266,9 +270,11 @@ pub(crate) struct ChatCompletionChoice {
     pub finish_reason: Option<String>,
 }
 
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, ToSchema)]
 pub(crate) struct ChatCompletionDelta {
+    #[schema(example = "user")]
     pub role: String,
+    #[schema(example = "What is Deep Learning?")]
     pub content: String,
 }
 
@@ -311,7 +317,7 @@ fn default_request_messages() -> Vec<Message> {
 #[derive(Clone, Deserialize, ToSchema, Serialize)]
 pub(crate) struct ChatRequest {
     /// UNUSED
-    #[schema(example = "bigscience/blomm-560m")]
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
     /// ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
     pub model: String, /* NOTE: UNUSED */
 
@@ -322,6 +328,7 @@ pub(crate) struct ChatRequest {
     /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
     /// decreasing the model's likelihood to repeat the same line verbatim.
     #[serde(default)]
+    #[schema(example = "1.0")]
     pub frequency_penalty: Option<f32>,
 
     /// UNUSED
@@ -336,28 +343,33 @@ pub(crate) struct ChatRequest {
     /// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each
     /// output token returned in the content of message.
     #[serde(default)]
+    #[schema(example = "false")]
     pub logprobs: Option<bool>,
 
     /// UNUSED
     /// An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with
     /// an associated log probability. logprobs must be set to true if this parameter is used.
     #[serde(default)]
+    #[schema(example = "5")]
     pub top_logprobs: Option<u32>,
 
     /// The maximum number of tokens that can be generated in the chat completion.
     #[serde(default)]
+    #[schema(example = "32")]
     pub max_tokens: Option<u32>,
 
     /// UNUSED
     /// How many chat completion choices to generate for each input message. Note that you will be charged based on the
     /// number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
     #[serde(default)]
+    #[schema(nullable = true, example = "2")]
     pub n: Option<u32>,
 
     /// UNUSED
     /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
     /// increasing the model's likelihood to talk about new topics
     #[serde(default)]
+    #[schema(nullable = true, example = 0.1)]
     pub presence_penalty: Option<f32>,
 
     #[serde(default = "bool::default")]
@@ -371,11 +383,13 @@ pub(crate) struct ChatRequest {
     ///
     /// We generally recommend altering this or `top_p` but not both.
     #[serde(default)]
+    #[schema(nullable = true, example = 1.0)]
     pub temperature: Option<f32>,
 
     /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the
     /// tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
     #[serde(default)]
+    #[schema(nullable = true, example = 0.95)]
     pub top_p: Option<f32>,
 }
 
@@ -458,6 +472,7 @@ pub struct SimpleToken {
 
 #[derive(Serialize, ToSchema)]
 #[serde(rename_all(serialize = "snake_case"))]
+#[schema(example = "Length")]
 pub(crate) enum FinishReason {
     #[schema(rename = "length")]
     Length,
@@ -518,6 +533,10 @@ pub(crate) struct GenerateResponse {
     pub details: Option<Details>,
 }
 
+#[derive(Serialize, ToSchema)]
+#[serde(transparent)]
+pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
+
 #[derive(Serialize, ToSchema)]
 pub(crate) struct StreamDetails {
     #[schema(example = "length")]
diff --git a/router/src/server.rs b/router/src/server.rs
index c5ca4665..998d6265 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -3,10 +3,10 @@ use crate::health::Health;
 use crate::infer::{InferError, InferResponse, InferStreamResponse};
 use crate::validation::ValidationError;
 use crate::{
-    BestOfSequence, ChatCompletion, ChatCompletionChunk, ChatRequest, CompatGenerateRequest,
-    Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse,
-    HubModelInfo, HubTokenizerConfig, Infer, Info, PrefillToken, SimpleToken, StreamDetails,
-    StreamResponse, Token, Validation,
+    BestOfSequence, ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionDelta,
+    ChatRequest, CompatGenerateRequest, Details, ErrorResponse, FinishReason, GenerateParameters,
+    GenerateRequest, GenerateResponse, HubModelInfo, HubTokenizerConfig, Infer, Info, Message,
+    PrefillToken, SimpleToken, StreamDetails, StreamResponse, Token, TokenizeResponse, Validation,
 };
 use axum::extract::Extension;
 use axum::http::{HeaderMap, Method, StatusCode};
@@ -677,7 +677,7 @@ async fn chat_completions(
     post,
     tag = "Text Generation Inference",
     path = "/tokenize",
-    request_body = TokenizeRequest,
+    request_body = GenerateRequest,
     responses(
     (status = 200, description = "Tokenized ids", body = TokenizeResponse),
     (status = 404, description = "No tokenizer found", body = ErrorResponse,
@@ -688,7 +688,7 @@ async fn chat_completions(
 async fn tokenize(
     Extension(infer): Extension<Infer>,
     Json(req): Json<GenerateRequest>,
-) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+) -> Result<Json<TokenizeResponse>, (StatusCode, Json<ErrorResponse>)> {
     let input = req.inputs.clone();
     let encoding = infer.tokenize(req).await?;
     if let Some(encoding) = encoding {
@@ -706,7 +706,7 @@ async fn tokenize(
                 }
             })
             .collect();
-        Ok(Json(tokens).into_response())
+        Ok(Json(TokenizeResponse(tokens)))
     } else {
         Err((
             StatusCode::NOT_FOUND,
@@ -774,10 +774,18 @@ pub async fn run(
     Info,
     CompatGenerateRequest,
     GenerateRequest,
+    ChatRequest,
+    Message,
+    ChatCompletionChoice,
+    ChatCompletionDelta,
+    ChatCompletionChunk,
+    ChatCompletion,
     GenerateParameters,
     PrefillToken,
     Token,
     GenerateResponse,
+    TokenizeResponse,
+    SimpleToken,
     BestOfSequence,
     Details,
     FinishReason,

From 650fea18341a009e86b30bb38d650dfd342029c4 Mon Sep 17 00:00:00 2001
From: fxmarty <9808326+fxmarty@users.noreply.github.com>
Date: Fri, 26 Jan 2024 16:27:44 +0100
Subject: [PATCH 15/31] GPTQ support on ROCm (#1489)

Tested with
```
CUDA_VISIBLE_DEVICES=0 text-generation-launcher --model-id TheBloke/Llama-2-7B-Chat-GPTQ --quantize gptq
EXLLAMA_VERSION=1 CUDA_VISIBLE_DEVICES=0 text-generation-launcher --model-id TheBloke/Llama-2-7B-Chat-GPTQ --quantize gptq
CUDA_VISIBLE_DEVICES="0,1" text-generation-launcher --model-id TheBloke/Llama-2-7B-Chat-GPTQ --quantize gptq
```

all with good and identical results on MI210.

---------

Co-authored-by: Felix Marty <felix@hf.co>
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
---
 .gitignore                                    | 10 +++++++
 Dockerfile_amd                                | 24 +++++++++++++--
 docs/source/supported_models.md               |  4 +--
 .../{cuda_compat.cuh => cu_compat.cuh}        |  6 ++--
 .../exllama_kernels/cuda_func/q4_matmul.cu    |  7 +++--
 .../exllama_kernels/hip_compat.cuh}           | 29 ++++++++++++++-----
 .../exllama_kernels/exllama_kernels/util.cuh  |  4 +++
 server/exllamav2_kernels/setup.py             | 11 +++++++
 .../utils/gptq/exllamav2.py                   |  5 +---
 server/text_generation_server/utils/layers.py |  2 +-
 10 files changed, 80 insertions(+), 22 deletions(-)
 rename server/exllama_kernels/exllama_kernels/{cuda_compat.cuh => cu_compat.cuh} (91%)
 rename server/{exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh => exllama_kernels/exllama_kernels/hip_compat.cuh} (68%)

diff --git a/.gitignore b/.gitignore
index 20c9baee..1f9ba162 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,13 @@
 target
 router/tokenizer.json
 *__pycache__*
+
+# ROCm auto-generated files
+*.hip
+server/exllamav2_kernels/exllamav2_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip_func/
+*_hip.cuh
+server/exllama_kernels/exllama_kernels/hip_buffers.cuh
+server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp
+
diff --git a/Dockerfile_amd b/Dockerfile_amd
index dd331a5d..d2b6f897 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -75,8 +75,8 @@ RUN chmod +x ~/mambaforge.sh && \
     mamba init && \
     rm ~/mambaforge.sh
 
-# Install PyTorch nightly (2.2.0.dev2023) compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
-RUN pip install --pre torch==2.2.0.dev20231106 --index-url https://download.pytorch.org/whl/nightly/rocm5.7
+# Install PyTorch 2.2 RC compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
+RUN pip install torch --index-url https://download.pytorch.org/whl/test/rocm5.7/
 
 FROM base AS kernel-builder
 
@@ -104,6 +104,20 @@ WORKDIR /usr/src
 COPY server/custom_kernels/ .
 RUN PYTORCH_ROCM_ARCH=gfx90a python setup.py build
 
+# Build exllama kernels
+FROM kernel-builder as exllama-kernels-builder
+WORKDIR /usr/src
+COPY server/exllama_kernels/ .
+
+RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
+
+# Build exllama v2 kernels
+FROM kernel-builder as exllamav2-kernels-builder
+WORKDIR /usr/src
+COPY server/exllamav2_kernels/ .
+
+RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
+
 FROM base as base-copy
 
 # Text Generation Inference base env
@@ -120,6 +134,12 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
 # Copy build artifacts from custom kernels builder
 COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
 
+# Copy build artifacts from exllama kernels builder
+COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from exllamav2 kernels builder
+COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
 
diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md
index 004790ab..df5102c2 100644
--- a/docs/source/supported_models.md
+++ b/docs/source/supported_models.md
@@ -43,8 +43,8 @@ text-generation-launcher --model-id <PATH-TO-LOCAL-BLOOM>
 
 TGI optimized models are supported on NVIDIA [A100](https://www.nvidia.com/en-us/data-center/a100/), [A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) and [T4](https://www.nvidia.com/en-us/data-center/tesla-t4/) GPUs with CUDA 12.2+. Note that you have to install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to use it. For other NVIDIA GPUs, continuous batching will still apply, but some operations like flash attention and paged attention will not be executed.
 
-TGI also has support of ROCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention and flash attention v2 support. The following features are currently not supported in the ROCm version of TGI, and the supported may be extended in the future:
-* Quantization (GPTQ, AWQ, etc.)
+TGI also has support of ROCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention, GPTQ quantization, flash attention v2 support. The following features are currently not supported in the ROCm version of TGI, and the supported may be extended in the future:
+* Loading [AWQ](https://huggingface.co/docs/transformers/quantization#awq) checkpoints.
 * Flash [layer norm kernel](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm)
 * Kernel for slinding window attention (Mistral)
 
diff --git a/server/exllama_kernels/exllama_kernels/cuda_compat.cuh b/server/exllama_kernels/exllama_kernels/cu_compat.cuh
similarity index 91%
rename from server/exllama_kernels/exllama_kernels/cuda_compat.cuh
rename to server/exllama_kernels/exllama_kernels/cu_compat.cuh
index 8dfa25de..c5258813 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_compat.cuh
+++ b/server/exllama_kernels/exllama_kernels/cu_compat.cuh
@@ -43,12 +43,12 @@ __device__ __forceinline__ void atomicAdd_half2(half2* address, half2 val)
 
 //
 
-#if defined(__CUDA_ARCH__)
-#if __CUDA_ARCH__ < 700
+#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
+#if __CUDA_ARCH__ < 700 || defined(USE_ROCM)
 
 __device__ __forceinline__ void atomicAdd(half* address, half val) { atomicAdd_half(address, val); }
 
-#if __CUDA_ARCH__ < 600
+#if __CUDA_ARCH__ < 600 || defined(USE_ROCM)
 __device__ __forceinline__ void atomicAdd(half2* address, half2 val) { atomicAdd_half2(address, val); }
 #endif
 
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
index 60dc4c9d..61380f42 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
@@ -2,8 +2,11 @@
 #include "column_remap.cuh"
 #include "../util.cuh"
 #include "../matrix.cuh"
-#include "../cuda_compat.cuh"
+#include "../cu_compat.cuh"
 #include "../cuda_buffers.cuh"
+#if defined(USE_ROCM)
+#include "../hip_compat.cuh"
+#endif
 
 const int THREADS_X = 32;       // Block size and thread count along columns in w and out
 const int THREADS_Y = 1;        // Block size and thread count along rows in x and out
@@ -128,7 +131,7 @@ __global__ void q4_matmul_kernel
 
     if constexpr (use_half2)
     {
-        half result = __hadd(acc.x, acc.y);
+        half result = __hadd(__low2half(acc), __high2half(acc));
         atomicAdd(out_.item_ptr(x_row, w_column), result);
     }
     else
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
similarity index 68%
rename from server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh
rename to server/exllama_kernels/exllama_kernels/hip_compat.cuh
index 19b1e4a6..4f2a7ae7 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh
+++ b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
@@ -1,12 +1,23 @@
-#ifndef _compat_gemm_cuh
-#define _compat_gemm_cuh
+// Adapted from turboderp exllama: https://github.com/turboderp/exllama
 
-#if defined(USE_ROCM)
+#ifndef _hip_compat_cuh
+#define _hip_compat_cuh
 
-// For some reason this include is not present anywhere in exllama_v2 codebase, but it is required
-// for symbols as hipblasHalf.
-#include <hipblas/hipblas.h>
+// Workaround for a bug in hipamd, backported from upstream, this is fixed in ROCm 5.6.
+__device__ __forceinline__ __half __compat_hrcp(__half x) {
+    return __half_raw{
+        static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half_raw>(x).data))};
+}
 
+__device__ __forceinline__ __half2 __compat_h2rcp(__half2 x) {
+    return _Float16_2{static_cast<_Float16>(__builtin_amdgcn_rcph(x.x)),
+        static_cast<_Float16>(__builtin_amdgcn_rcph(x.y))};
+}
+
+#define hrcp __compat_hrcp
+#define h2rcp __compat_h2rcp
+
+// Automatic conversion of hipblasHgemm doesn't convert half to hipblasHalf.
 __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t    handle,
                                                                hipblasOperation_t transA,
                                                                hipblasOperation_t transB,
@@ -31,8 +42,10 @@ __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t
 #define hipblasHgemm __compat_hipblasHgemm
 
 // Previous version of PyTorch were converting to rocBLAS instead of hipBLAS.
+#define rocblas_handle hipblasHandle_t
 #define rocblas_operation_none HIPBLAS_OP_N
+#define rocblas_get_stream hipblasGetStream
+#define rocblas_set_stream hipblasSetStream
 #define rocblas_hgemm __compat_hipblasHgemm
-#endif
 
-#endif
+#endif
\ No newline at end of file
diff --git a/server/exllama_kernels/exllama_kernels/util.cuh b/server/exllama_kernels/exllama_kernels/util.cuh
index 2839b10f..7b397573 100644
--- a/server/exllama_kernels/exllama_kernels/util.cuh
+++ b/server/exllama_kernels/exllama_kernels/util.cuh
@@ -8,7 +8,11 @@
 #include <cstdint>
 #include <cstdio>
 
+#if defined(USE_ROCM)
+#define cudaUnspecified hipErrorUnknown
+#else
 #define cudaUnspecified cudaErrorApiFailureBase
+#endif
 
 // React to failure on return code != cudaSuccess
 
diff --git a/server/exllamav2_kernels/setup.py b/server/exllamav2_kernels/setup.py
index 518db1df..4a16b546 100644
--- a/server/exllamav2_kernels/setup.py
+++ b/server/exllamav2_kernels/setup.py
@@ -1,5 +1,15 @@
 from setuptools import setup
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+import torch
+
+extra_cuda_cflags = ["-lineinfo", "-O3"]
+
+if torch.version.hip:
+    extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]
+
+extra_compile_args = {
+    "nvcc": extra_cuda_cflags,
+}
 
 setup(
     name="exllamav2_kernels",
@@ -11,6 +21,7 @@ setup(
                 "exllamav2_kernels/cuda/q_matrix.cu",
                 "exllamav2_kernels/cuda/q_gemm.cu",
             ],
+            extra_compile_args=extra_compile_args,
         )
     ],
     cmdclass={"build_ext": BuildExtension},
diff --git a/server/text_generation_server/utils/gptq/exllamav2.py b/server/text_generation_server/utils/gptq/exllamav2.py
index 2b897f25..80836a95 100644
--- a/server/text_generation_server/utils/gptq/exllamav2.py
+++ b/server/text_generation_server/utils/gptq/exllamav2.py
@@ -1,12 +1,9 @@
 # Adapted from turboderp exllama: https://github.com/turboderp/exllamav2
 
-from logging import getLogger
-
 import torch
 import torch.nn as nn
-import math
 
-logger = getLogger(__name__)
+from loguru import logger
 
 try:
     from exllamav2_kernels import make_q_matrix, gemm_half_q_half
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index c9393d99..6ddfd6f4 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -33,7 +33,7 @@ except Exception:
     major = 1
 
 HAS_EXLLAMA = False
-CAN_EXLLAMA = major >= 8
+CAN_EXLLAMA = major >= 8 or IS_ROCM_SYSTEM
 V2 = os.getenv("EXLLAMA_VERSION", "2") == "2"
 # if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
 #     V2 = False

From d9758851be6279444a81901b54e7f55af771f6ad Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Fri, 26 Jan 2024 12:01:33 -0500
Subject: [PATCH 16/31] feat: add tokenizer-config-path to launcher args
 (#1495)

This PR adds the `tokenizer-config-path` to the launcher and passes it
to the router

Fixes:
https://github.com/huggingface/text-generation-inference/pull/1427
---
 docs/source/basic_tutorials/launcher.md |  8 ++++++++
 launcher/src/main.rs                    | 11 +++++++++++
 2 files changed, 19 insertions(+)

diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md
index bafe3669..712b4fc4 100644
--- a/docs/source/basic_tutorials/launcher.md
+++ b/docs/source/basic_tutorials/launcher.md
@@ -354,6 +354,14 @@ Options:
           
           [env: NGROK_EDGE=]
 
+```
+## TOKENIZER_CONFIG_PATH
+```shell
+      --tokenizer-config-path <TOKENIZER_CONFIG_PATH>
+          The path to the tokenizer config file. This path is used to load the tokenizer configuration which may include a `chat_template`. If not provided, the default config will be used from the model hub
+          
+          [env: TOKENIZER_CONFIG_PATH=]
+
 ```
 ## ENV
 ```shell
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 09657c91..f0e45141 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -368,6 +368,11 @@ struct Args {
     #[clap(long, env)]
     ngrok_edge: Option<String>,
 
+    /// The path to the tokenizer config file. This path is used to load the tokenizer configuration which may
+    /// include a `chat_template`. If not provided, the default config will be used from the model hub.
+    #[clap(long, env)]
+    tokenizer_config_path: Option<String>,
+
     /// Display a lot of information about your runtime environment
     #[clap(long, short, action)]
     env: bool,
@@ -1016,6 +1021,12 @@ fn spawn_webserver(
         args.model_id,
     ];
 
+    // Tokenizer config path
+    if let Some(ref tokenizer_config_path) = args.tokenizer_config_path {
+        router_args.push("--tokenizer-config-path".to_string());
+        router_args.push(tokenizer_config_path.to_string());
+    }
+
     // Model optional max batch total tokens
     if let Some(max_batch_total_tokens) = args.max_batch_total_tokens {
         router_args.push("--max-batch-total-tokens".to_string());

From c2d4a3b5c7bb6a8367c00f7c797bf87f4b2fcef9 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Fri, 26 Jan 2024 19:04:57 +0100
Subject: [PATCH 17/31] v1.4.0 (#1494)

---
 .github/workflows/delete_doc_comment.yml      |   12 -
 Cargo.lock                                    |  420 +++---
 Cargo.toml                                    |    2 +-
 README.md                                     |    6 +-
 docs/openapi.json                             | 1294 ++++++++++++++++-
 .../basic_tutorials/gated_model_access.md     |    2 +-
 docs/source/quicktour.md                      |    6 +-
 integration-tests/models/test_flash_phi.py    |    8 +-
 integration-tests/pyproject.toml              |    2 +-
 server/poetry.lock                            | 1193 ++++++++-------
 server/pyproject.toml                         |    2 +-
 server/requirements_cuda.txt                  |   18 +-
 server/requirements_rocm.txt                  |   18 +-
 server/tests/utils/test_layers.py             |   37 +-
 server/text_generation_server/cli.py          |    2 +-
 .../text_generation_server/models/__init__.py |    6 +-
 .../custom_modeling/flash_phi_modeling.py     |   30 +-
 .../models/custom_modeling/mpt_modeling.py    |   27 +-
 .../models/custom_modeling/phi_modeling.py    |   62 +-
 .../models/flash_llama.py                     |   12 +-
 .../models/flash_phi.py                       |   12 +-
 server/text_generation_server/models/phi.py   |    7 +-
 server/text_generation_server/utils/layers.py |    4 +-
 23 files changed, 2333 insertions(+), 849 deletions(-)
 delete mode 100644 .github/workflows/delete_doc_comment.yml

diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml
deleted file mode 100644
index 1cad807b..00000000
--- a/.github/workflows/delete_doc_comment.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-name: Delete doc comment
-
-on:
-  pull_request:
-    types: [ closed ]
-
-
-jobs:
-  delete:
-    uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main
-    with:
-      pr_number: ${{ github.event.number }}
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 3baff665..7fdf301a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
 [[package]]
 name = "ahash"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
+checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -40,9 +40,9 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.6.5"
+version = "0.6.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
+checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -88,9 +88,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.76"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59d2a3357dde987206219e78ecfbbb6e8dad06cbb65292758d3270e6254f7355"
+checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
 
 [[package]]
 name = "arc-swap"
@@ -128,18 +128,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.75"
+version = "0.1.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdf6721fb0140e4f897002dd086c06f6c27775df19cfe1fccb21181a48fd2c98"
+checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -261,9 +261,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
 
 [[package]]
 name = "base64"
-version = "0.21.5"
+version = "0.21.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
 
 [[package]]
 name = "bitflags"
@@ -273,9 +273,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.4.1"
+version = "2.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
 
 [[package]]
 name = "block-buffer"
@@ -310,6 +310,38 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
 
+[[package]]
+name = "camino"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "cargo-platform"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ceed8ef69d8518a5dda55c07425450b58a4e1946f4951eab6d7191ee86c2443d"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "cargo_metadata"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037"
+dependencies = [
+ "camino",
+ "cargo-platform",
+ "semver",
+ "serde",
+ "serde_json",
+ "thiserror",
+]
+
 [[package]]
 name = "cassowary"
 version = "0.3.0"
@@ -333,9 +365,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "clap"
-version = "4.4.11"
+version = "4.4.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
+checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -343,9 +375,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.4.11"
+version = "4.4.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
+checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7"
 dependencies = [
  "anstream",
  "anstyle",
@@ -362,7 +394,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -379,15 +411,15 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
 [[package]]
 name = "console"
-version = "0.15.7"
+version = "0.15.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
+checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
 dependencies = [
  "encode_unicode",
  "lazy_static",
  "libc",
  "unicode-width",
- "windows-sys 0.45.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -408,9 +440,9 @@ checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.11"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0"
+checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
 dependencies = [
  "libc",
 ]
@@ -426,45 +458,37 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.9"
+version = "0.5.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5"
+checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b"
 dependencies = [
- "cfg-if",
  "crossbeam-utils",
 ]
 
 [[package]]
 name = "crossbeam-deque"
-version = "0.8.4"
+version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
 dependencies = [
- "cfg-if",
  "crossbeam-epoch",
  "crossbeam-utils",
 ]
 
 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.16"
+version = "0.9.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
 dependencies = [
- "autocfg",
- "cfg-if",
  "crossbeam-utils",
- "memoffset",
 ]
 
 [[package]]
 name = "crossbeam-utils"
-version = "0.8.17"
+version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f"
-dependencies = [
- "cfg-if",
-]
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
 
 [[package]]
 name = "crossterm"
@@ -472,7 +496,7 @@ version = "0.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "crossterm_winapi",
  "libc",
  "mio",
@@ -503,12 +527,12 @@ dependencies = [
 
 [[package]]
 name = "ctrlc"
-version = "3.4.1"
+version = "3.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82e95fbd621905b854affdc67943b043a0fbb6ed7385fd5a25650d19a8a6cfdf"
+checksum = "b467862cc8610ca6fc9a1532d7777cee0804e678ab45410897b9396495994a0b"
 dependencies = [
  "nix",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -548,9 +572,9 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.3.10"
+version = "0.3.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eb30d70a07a3b04884d2677f06bec33509dc67ca60d92949e5535352d3191dc"
+checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
 dependencies = [
  "powerfmt",
 ]
@@ -758,9 +782,9 @@ dependencies = [
 
 [[package]]
 name = "futures"
-version = "0.3.29"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335"
+checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -789,9 +813,9 @@ checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
 
 [[package]]
 name = "futures-executor"
-version = "0.3.29"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc"
+checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
 dependencies = [
  "futures-core",
  "futures-task",
@@ -812,7 +836,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -866,9 +890,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.11"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
+checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
 dependencies = [
  "cfg-if",
  "libc",
@@ -893,9 +917,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.3.22"
+version = "0.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178"
+checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
 dependencies = [
  "bytes",
  "fnv",
@@ -939,9 +963,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
+checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
 
 [[package]]
 name = "hf-hub"
@@ -1183,9 +1207,9 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
 
 [[package]]
 name = "js-sys"
-version = "0.3.66"
+version = "0.3.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
+checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -1198,9 +1222,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
 name = "libc"
-version = "0.2.151"
+version = "0.2.152"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
+checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
 
 [[package]]
 name = "libm"
@@ -1214,16 +1238,16 @@ version = "0.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "libc",
  "redox_syscall",
 ]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
+checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
 
 [[package]]
 name = "lock_api"
@@ -1289,18 +1313,9 @@ checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 
 [[package]]
 name = "memchr"
-version = "2.6.4"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
-
-[[package]]
-name = "memoffset"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
-dependencies = [
- "autocfg",
-]
+checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
 
 [[package]]
 name = "metrics"
@@ -1319,7 +1334,7 @@ version = "0.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d4fa7ce7c4862db464a37b0b31d89bca874562f034bd7993895572783d02950"
 dependencies = [
- "base64 0.21.5",
+ "base64 0.21.7",
  "hyper",
  "indexmap 1.9.3",
  "ipnet",
@@ -1333,13 +1348,13 @@ dependencies = [
 
 [[package]]
 name = "metrics-macros"
-version = "0.7.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
+checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -1375,9 +1390,9 @@ dependencies = [
 
 [[package]]
 name = "minijinja"
-version = "1.0.10"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "208758577ef2c86cf5dd3e85730d161413ec3284e2d73b2ef65d9a24d9971bcb"
+checksum = "6fe0ff215195a22884d867b547c70a0c4815cbbcc70991f281dca604b20d10ce"
 dependencies = [
  "serde",
 ]
@@ -1411,9 +1426,9 @@ dependencies = [
 
 [[package]]
 name = "monostate"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e404e13820ea0df0eda93aa294e0c80de76a0daa6bec590d376fbec6d7810394"
+checksum = "878c2a1f1c70e5724fa28f101ca787b6a7e8ad5c5e4ae4ca3b0fa4a419fa9075"
 dependencies = [
  "monostate-impl",
  "serde",
@@ -1421,13 +1436,13 @@ dependencies = [
 
 [[package]]
 name = "monostate-impl"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c"
+checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -1510,7 +1525,7 @@ version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "cfg-if",
  "libc",
 ]
@@ -1587,9 +1602,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
 [[package]]
 name = "object"
-version = "0.32.1"
+version = "0.32.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
+checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
 dependencies = [
  "memchr",
 ]
@@ -1624,11 +1639,11 @@ dependencies = [
 
 [[package]]
 name = "openssl"
-version = "0.10.62"
+version = "0.10.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cde4d2d9200ad5909f8dac647e29482e07c3a35de8a13fce7c9c7747ad9f671"
+checksum = "15c9d69dd87a29568d4d017cfe8ec518706046a05184e5aea92d0af890b803c8"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "cfg-if",
  "foreign-types",
  "libc",
@@ -1645,7 +1660,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -1656,9 +1671,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.98"
+version = "0.9.99"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1665caf8ab2dc9aef43d1c0023bd904633a6a05cb30b0ad59bec2ae986e57a7"
+checksum = "22e1bf214306098e4832460f797824c05d25aacdf896f64a985fb0fd992454ae"
 dependencies = [
  "cc",
  "libc",
@@ -1846,22 +1861,22 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
+checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
+checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -1878,9 +1893,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
+checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb"
 
 [[package]]
 name = "portable-atomic"
@@ -1902,12 +1917,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "prettyplease"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
+checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
 dependencies = [
  "proc-macro2",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -1936,9 +1951,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.71"
+version = "1.0.78"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
+checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
 dependencies = [
  "unicode-ident",
 ]
@@ -1980,7 +1995,7 @@ dependencies = [
  "prost 0.12.3",
  "prost-types",
  "regex",
- "syn 2.0.42",
+ "syn 2.0.48",
  "tempfile",
  "which",
 ]
@@ -2008,7 +2023,7 @@ dependencies = [
  "itertools 0.11.0",
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -2038,9 +2053,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.33"
+version = "1.0.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
 dependencies = [
  "proc-macro2",
 ]
@@ -2081,7 +2096,7 @@ version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "cassowary",
  "crossterm",
  "indoc",
@@ -2103,9 +2118,9 @@ dependencies = [
 
 [[package]]
 name = "rayon"
-version = "1.8.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
+checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051"
 dependencies = [
  "either",
  "rayon-core",
@@ -2124,9 +2139,9 @@ dependencies = [
 
 [[package]]
 name = "rayon-core"
-version = "1.12.0"
+version = "1.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 dependencies = [
  "crossbeam-deque",
  "crossbeam-utils",
@@ -2154,13 +2169,13 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.2"
+version = "1.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
+checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata 0.4.3",
+ "regex-automata 0.4.5",
  "regex-syntax 0.8.2",
 ]
 
@@ -2175,9 +2190,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.3"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
+checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -2208,7 +2223,7 @@ version = "0.11.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
 dependencies = [
- "base64 0.21.5",
+ "base64 0.21.7",
  "bytes",
  "encoding_rs",
  "futures-core",
@@ -2290,7 +2305,7 @@ dependencies = [
  "quote",
  "rust-embed-utils",
  "shellexpand",
- "syn 2.0.42",
+ "syn 2.0.48",
  "walkdir",
 ]
 
@@ -2321,11 +2336,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.28"
+version = "0.38.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
+checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -2362,7 +2377,7 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
 dependencies = [
- "base64 0.21.5",
+ "base64 0.21.7",
 ]
 
 [[package]]
@@ -2398,11 +2413,11 @@ dependencies = [
 
 [[package]]
 name = "schannel"
-version = "0.1.22"
+version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88"
+checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
 dependencies = [
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -2446,35 +2461,38 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.20"
+version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
+checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
+dependencies = [
+ "serde",
+]
 
 [[package]]
 name = "serde"
-version = "1.0.193"
+version = "1.0.195"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
+checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.193"
+version = "1.0.195"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
+checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.108"
+version = "1.0.111"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
+checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4"
 dependencies = [
  "itoa",
  "ryu",
@@ -2483,9 +2501,9 @@ dependencies = [
 
 [[package]]
 name = "serde_path_to_error"
-version = "0.1.14"
+version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4beec8bce849d58d06238cb50db2e1c417cfeafa4c63f692b15c82b7c80f8335"
+checksum = "ebd154a240de39fdebcf5775d2675c204d7c13cf39a4c697be6493c8e734337c"
 dependencies = [
  "itoa",
  "serde",
@@ -2588,9 +2606,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.11.2"
+version = "1.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
+checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
 
 [[package]]
 name = "socket2"
@@ -2651,7 +2669,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -2667,9 +2685,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.42"
+version = "2.0.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8"
+checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2684,16 +2702,16 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
 
 [[package]]
 name = "sysinfo"
-version = "0.29.11"
+version = "0.30.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666"
+checksum = "1fb4f3438c8f6389c864e61221cbc97e9bca98b4daf39a5beb7bea660f528bb2"
 dependencies = [
  "cfg-if",
  "core-foundation-sys",
  "libc",
  "ntapi",
  "once_cell",
- "winapi",
+ "windows",
 ]
 
 [[package]]
@@ -2743,20 +2761,20 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.8.1"
+version = "3.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5"
+checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
 dependencies = [
  "cfg-if",
  "fastrand",
  "redox_syscall",
  "rustix",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "text-generation-benchmark"
-version = "1.3.4"
+version = "1.4.0"
 dependencies = [
  "average",
  "clap",
@@ -2777,7 +2795,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-client"
-version = "1.3.4"
+version = "1.4.0"
 dependencies = [
  "futures",
  "grpc-metadata",
@@ -2793,7 +2811,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-launcher"
-version = "1.3.4"
+version = "1.4.0"
 dependencies = [
  "clap",
  "ctrlc",
@@ -2809,7 +2827,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-router"
-version = "1.3.4"
+version = "1.4.0"
 dependencies = [
  "async-stream",
  "axum",
@@ -2846,22 +2864,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.51"
+version = "1.0.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
+checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.51"
+version = "1.0.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
+checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -2991,7 +3009,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -3049,7 +3067,7 @@ checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
 dependencies = [
  "async-trait",
  "axum",
- "base64 0.21.5",
+ "base64 0.21.7",
  "bytes",
  "futures-core",
  "futures-util",
@@ -3078,7 +3096,7 @@ dependencies = [
  "async-stream",
  "async-trait",
  "axum",
- "base64 0.21.5",
+ "base64 0.21.7",
  "bytes",
  "h2",
  "http",
@@ -3106,7 +3124,7 @@ dependencies = [
  "proc-macro2",
  "prost-build",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -3135,7 +3153,7 @@ version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 2.4.2",
  "bytes",
  "futures-core",
  "futures-util",
@@ -3179,7 +3197,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -3297,9 +3315,9 @@ dependencies = [
 
 [[package]]
 name = "unicode-bidi"
-version = "0.3.14"
+version = "0.3.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-ident"
@@ -3361,7 +3379,7 @@ version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8cdd25c339e200129fe4de81451814e5228c9b771d57378817d6117cc2b3f97"
 dependencies = [
- "base64 0.21.5",
+ "base64 0.21.7",
  "flate2",
  "log",
  "native-tls",
@@ -3419,7 +3437,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -3452,11 +3470,14 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
 
 [[package]]
 name = "vergen"
-version = "8.2.6"
+version = "8.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1290fd64cc4e7d3c9b07d7f333ce0ce0007253e32870e632624835cc80b83939"
+checksum = "e27d6bdd219887a9eadd19e1c34f32e47fa332301184935c6d9bca26f3cca525"
 dependencies = [
  "anyhow",
+ "cargo_metadata",
+ "cfg-if",
+ "regex",
  "rustc_version",
  "rustversion",
  "sysinfo",
@@ -3496,9 +3517,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.89"
+version = "0.2.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
+checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406"
 dependencies = [
  "cfg-if",
  "wasm-bindgen-macro",
@@ -3506,24 +3527,24 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.89"
+version = "0.2.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
+checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.39"
+version = "0.4.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12"
+checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -3533,9 +3554,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.89"
+version = "0.2.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
+checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -3543,28 +3564,28 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.89"
+version = "0.2.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
+checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.89"
+version = "0.2.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
+checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b"
 
 [[package]]
 name = "web-sys"
-version = "0.3.66"
+version = "0.3.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f"
+checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -3629,6 +3650,25 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
+[[package]]
+name = "windows"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
+dependencies = [
+ "windows-core",
+ "windows-targets 0.52.0",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+dependencies = [
+ "windows-targets 0.52.0",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.45.0"
@@ -3854,7 +3894,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.42",
+ "syn 2.0.48",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 80e6e145..a328a368 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "1.3.4"
+version = "1.4.0"
 edition = "2021"
 authors = ["Olivier Dehaene"]
 homepage = "https://github.com/huggingface/text-generation-inference"
diff --git a/README.md b/README.md
index 73356f28..5fdb9f14 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c
 model=HuggingFaceH4/zephyr-7b-beta
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
 ```
 
 And then you can make requests like
@@ -76,7 +76,7 @@ curl 127.0.0.1:8080/generate \
 
 **Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
 
-**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model` instead of the command above.
+**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4-rocm --model-id $model` instead of the command above.
 
 To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
 ```
@@ -106,7 +106,7 @@ model=meta-llama/Llama-2-7b-chat-hf
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 token=<your cli READ token>
 
-docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
+docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
 ```
 
 ### A note on Shared Memory (shm)
diff --git a/docs/openapi.json b/docs/openapi.json
index 9a9ed116..da3969df 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1 +1,1293 @@
-{"openapi":"3.0.3","info":{"title":"Text Generation Inference","description":"Text Generation Webserver","contact":{"name":"Olivier Dehaene"},"license":{"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0"},"version":"1.3.4"},"paths":{"/":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens if `stream == false` or a stream of token if `stream == true`","description":"Generate tokens if `stream == false` or a stream of token if `stream == true`","operationId":"compat_generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompatGenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}},"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"generate","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateResponse"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/generate_stream":{"post":{"tags":["Text Generation Inference"],"summary":"Generate a stream of token using Server-Sent Events","description":"Generate a stream of token using Server-Sent Events","operationId":"generate_stream","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/StreamResponse"}}}},"422":{"description":"Input validation error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"text/event-stream":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}},"/health":{"get":{"tags":["Text Generation Inference"],"summary":"Health check method","description":"Health check method","operationId":"health","responses":{"200":{"description":"Everything is working fine"},"503":{"description":"Text generation inference is down","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"unhealthy","error_type":"healthcheck"}}}}}}},"/info":{"get":{"tags":["Text Generation Inference"],"summary":"Text Generation Inference endpoint info","description":"Text Generation Inference endpoint info","operationId":"get_model_info","responses":{"200":{"description":"Served model info","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Info"}}}}}}},"/metrics":{"get":{"tags":["Text Generation Inference"],"summary":"Prometheus metrics scrape endpoint","description":"Prometheus metrics scrape endpoint","operationId":"metrics","responses":{"200":{"description":"Prometheus Metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}},"/tokenize":{"post":{"tags":["Text Generation Inference"],"summary":"Tokenize inputs","description":"Tokenize inputs","operationId":"tokenize","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/GenerateRequest"}}},"required":true},"responses":{"200":{"description":"Tokenized ids","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TokenizeResponse"}}}},"404":{"description":"No tokenizer found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"No fast tokenizer available"}}}}}}},"/v1/chat/completions":{"post":{"tags":["Text Generation Inference"],"summary":"Generate tokens","description":"Generate tokens","operationId":"chat_completions","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatRequest"}}},"required":true},"responses":{"200":{"description":"Generated Text","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionChunk"}}}},"422":{"description":"Input validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Input validation error"}}}},"424":{"description":"Generation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Request failed during generation"}}}},"429":{"description":"Model is overloaded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Model is overloaded"}}}},"500":{"description":"Incomplete generation","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"example":{"error":"Incomplete generation"}}}}}}}},"components":{"schemas":{"BestOfSequence":{"type":"object","required":["generated_text","finish_reason","generated_tokens","prefill","tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_text":{"type":"string","example":"test"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ChatCompletion":{"type":"object","required":["id","object","created","model","system_fingerprint","choices","usage"],"properties":{"choices":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionComplete"}},"created":{"type":"integer","format":"int64","example":"1706270835","minimum":0},"id":{"type":"string"},"model":{"type":"string","example":"mistralai/Mistral-7B-Instruct-v0.2"},"object":{"type":"string"},"system_fingerprint":{"type":"string"},"usage":{"$ref":"#/components/schemas/Usage"}}},"ChatCompletionChoice":{"type":"object","required":["index","delta"],"properties":{"delta":{"$ref":"#/components/schemas/ChatCompletionDelta"},"finish_reason":{"type":"string","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"logprobs":{"type":"number","format":"float","nullable":true}}},"ChatCompletionChunk":{"type":"object","required":["id","object","created","model","system_fingerprint","choices"],"properties":{"choices":{"type":"array","items":{"$ref":"#/components/schemas/ChatCompletionChoice"}},"created":{"type":"integer","format":"int64","example":"1706270978","minimum":0},"id":{"type":"string"},"model":{"type":"string","example":"mistralai/Mistral-7B-Instruct-v0.2"},"object":{"type":"string"},"system_fingerprint":{"type":"string"}}},"ChatCompletionDelta":{"type":"object","required":["role","content"],"properties":{"content":{"type":"string","example":"What is Deep Learning?"},"role":{"type":"string","example":"user"}}},"ChatRequest":{"type":"object","required":["model"],"properties":{"frequency_penalty":{"type":"number","format":"float","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.","example":"1.0","nullable":true},"logit_bias":{"type":"array","items":{"type":"number","format":"float"},"description":"UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.","nullable":true},"logprobs":{"type":"boolean","description":"Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.","example":"false","nullable":true},"max_tokens":{"type":"integer","format":"int32","description":"The maximum number of tokens that can be generated in the chat completion.","example":"32","nullable":true,"minimum":0},"messages":{"type":"array","items":{"$ref":"#/components/schemas/Message"},"description":"A list of messages comprising the conversation so far."},"model":{"type":"string","description":"UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.","example":"mistralai/Mistral-7B-Instruct-v0.2"},"n":{"type":"integer","format":"int32","description":"UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.","example":"2","nullable":true,"minimum":0},"presence_penalty":{"type":"number","format":"float","description":"UNUSED\nNumber between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics","example":0.1,"nullable":true},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"stream":{"type":"boolean"},"temperature":{"type":"number","format":"float","description":"What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.","example":1.0,"nullable":true},"top_logprobs":{"type":"integer","format":"int32","description":"UNUSED\nAn integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.","example":"5","nullable":true,"minimum":0},"top_p":{"type":"number","format":"float","description":"An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.","example":0.95,"nullable":true}}},"CompatGenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"},"stream":{"type":"boolean","default":"false"}}},"Details":{"type":"object","required":["finish_reason","generated_tokens","prefill","tokens"],"properties":{"best_of_sequences":{"type":"array","items":{"$ref":"#/components/schemas/BestOfSequence"},"nullable":true},"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"prefill":{"type":"array","items":{"$ref":"#/components/schemas/PrefillToken"}},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0},"tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}},"top_tokens":{"type":"array","items":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}}},"ErrorResponse":{"type":"object","required":["error","error_type"],"properties":{"error":{"type":"string"},"error_type":{"type":"string"}}},"FinishReason":{"type":"string","enum":["length","eos_token","stop_sequence"],"example":"Length"},"GenerateParameters":{"type":"object","properties":{"best_of":{"type":"integer","default":"null","example":1,"nullable":true,"minimum":0,"exclusiveMinimum":0},"decoder_input_details":{"type":"boolean","default":"true"},"details":{"type":"boolean","default":"true"},"do_sample":{"type":"boolean","default":"false","example":true},"max_new_tokens":{"type":"integer","format":"int32","default":"100","example":"20","nullable":true,"minimum":0},"repetition_penalty":{"type":"number","format":"float","default":"null","example":1.03,"nullable":true,"exclusiveMinimum":0},"return_full_text":{"type":"boolean","default":"null","example":false,"nullable":true},"seed":{"type":"integer","format":"int64","default":"null","example":"null","nullable":true,"minimum":0,"exclusiveMinimum":0},"stop":{"type":"array","items":{"type":"string"},"example":["photographer"],"maxItems":4},"temperature":{"type":"number","format":"float","default":"null","example":0.5,"nullable":true,"exclusiveMinimum":0},"top_k":{"type":"integer","format":"int32","default":"null","example":10,"nullable":true,"exclusiveMinimum":0},"top_n_tokens":{"type":"integer","format":"int32","default":"null","example":5,"nullable":true,"minimum":0,"exclusiveMinimum":0},"top_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"truncate":{"type":"integer","default":"null","example":"null","nullable":true,"minimum":0},"typical_p":{"type":"number","format":"float","default":"null","example":0.95,"nullable":true,"maximum":1,"exclusiveMinimum":0},"watermark":{"type":"boolean","default":"false","example":true}}},"GenerateRequest":{"type":"object","required":["inputs"],"properties":{"inputs":{"type":"string","example":"My name is Olivier and I"},"parameters":{"$ref":"#/components/schemas/GenerateParameters"}}},"GenerateResponse":{"type":"object","required":["generated_text"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/Details"}],"nullable":true},"generated_text":{"type":"string","example":"test"}}},"Info":{"type":"object","required":["model_id","model_dtype","model_device_type","max_concurrent_requests","max_best_of","max_stop_sequences","max_input_length","max_total_tokens","waiting_served_ratio","max_batch_total_tokens","max_waiting_tokens","validation_workers","version"],"properties":{"docker_label":{"type":"string","example":"null","nullable":true},"max_batch_total_tokens":{"type":"integer","format":"int32","example":"32000","minimum":0},"max_best_of":{"type":"integer","example":"2","minimum":0},"max_concurrent_requests":{"type":"integer","description":"Router Parameters","example":"128","minimum":0},"max_input_length":{"type":"integer","example":"1024","minimum":0},"max_stop_sequences":{"type":"integer","example":"4","minimum":0},"max_total_tokens":{"type":"integer","example":"2048","minimum":0},"max_waiting_tokens":{"type":"integer","example":"20","minimum":0},"model_device_type":{"type":"string","example":"cuda"},"model_dtype":{"type":"string","example":"torch.float16"},"model_id":{"type":"string","description":"Model info","example":"bigscience/blomm-560m"},"model_pipeline_tag":{"type":"string","example":"text-generation","nullable":true},"model_sha":{"type":"string","example":"e985a63cdc139290c5f700ff1929f0b5942cced2","nullable":true},"sha":{"type":"string","example":"null","nullable":true},"validation_workers":{"type":"integer","example":"2","minimum":0},"version":{"type":"string","description":"Router Info","example":"0.5.0"},"waiting_served_ratio":{"type":"number","format":"float","example":"1.2"}}},"Message":{"type":"object","required":["role","content"],"properties":{"content":{"type":"string","example":"My name is David and I"},"role":{"type":"string","example":"user"}}},"PrefillToken":{"type":"object","required":["id","text","logprob"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"text":{"type":"string","example":"test"}}},"SimpleToken":{"type":"object","required":["id","text","start","stop"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"start":{"type":"integer","example":0,"minimum":0},"stop":{"type":"integer","example":2,"minimum":0},"text":{"type":"string","example":"test"}}},"StreamDetails":{"type":"object","required":["finish_reason","generated_tokens"],"properties":{"finish_reason":{"$ref":"#/components/schemas/FinishReason"},"generated_tokens":{"type":"integer","format":"int32","example":1,"minimum":0},"seed":{"type":"integer","format":"int64","example":42,"nullable":true,"minimum":0}}},"StreamResponse":{"type":"object","required":["index","token"],"properties":{"details":{"allOf":[{"$ref":"#/components/schemas/StreamDetails"}],"default":"null","nullable":true},"generated_text":{"type":"string","default":"null","example":"test","nullable":true},"index":{"type":"integer","format":"int32","minimum":0},"token":{"$ref":"#/components/schemas/Token"},"top_tokens":{"type":"array","items":{"$ref":"#/components/schemas/Token"}}}},"Token":{"type":"object","required":["id","text","logprob","special"],"properties":{"id":{"type":"integer","format":"int32","example":0,"minimum":0},"logprob":{"type":"number","format":"float","example":-0.34,"nullable":true},"special":{"type":"boolean","example":"false"},"text":{"type":"string","example":"test"}}},"TokenizeResponse":{"type":"array","items":{"$ref":"#/components/schemas/SimpleToken"}}}},"tags":[{"name":"Text Generation Inference","description":"Hugging Face Text Generation Inference API"}]}
\ No newline at end of file
+{
+  "openapi": "3.0.3",
+  "info": {
+    "title": "Text Generation Inference",
+    "description": "Text Generation Webserver",
+    "contact": {
+      "name": "Olivier Dehaene"
+    },
+    "license": {
+      "name": "Apache 2.0",
+      "url": "https://www.apache.org/licenses/LICENSE-2.0"
+    },
+    "version": "1.4.0"
+  },
+  "paths": {
+    "/": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
+        "description": "Generate tokens if `stream == false` or a stream of token if `stream == true`",
+        "operationId": "compat_generate",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CompatGenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/GenerateResponse"
+                }
+              },
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/StreamResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/generate": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "generate",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/GenerateResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/generate_stream": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate a stream of token using Server-Sent Events",
+        "description": "Generate a stream of token using Server-Sent Events",
+        "operationId": "generate_stream",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/StreamResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/health": {
+      "get": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Health check method",
+        "description": "Health check method",
+        "operationId": "health",
+        "responses": {
+          "200": {
+            "description": "Everything is working fine"
+          },
+          "503": {
+            "description": "Text generation inference is down",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "unhealthy",
+                  "error_type": "healthcheck"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/info": {
+      "get": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Text Generation Inference endpoint info",
+        "description": "Text Generation Inference endpoint info",
+        "operationId": "get_model_info",
+        "responses": {
+          "200": {
+            "description": "Served model info",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Info"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/metrics": {
+      "get": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Prometheus metrics scrape endpoint",
+        "description": "Prometheus metrics scrape endpoint",
+        "operationId": "metrics",
+        "responses": {
+          "200": {
+            "description": "Prometheus Metrics",
+            "content": {
+              "text/plain": {
+                "schema": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/tokenize": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Tokenize inputs",
+        "description": "Tokenize inputs",
+        "operationId": "tokenize",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Tokenized ids",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/TokenizeResponse"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "No tokenizer found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "No fast tokenizer available"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/v1/chat/completions": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "chat_completions",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ChatRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionChunk"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "BestOfSequence": {
+        "type": "object",
+        "required": [
+          "generated_text",
+          "finish_reason",
+          "generated_tokens",
+          "prefill",
+          "tokens"
+        ],
+        "properties": {
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_text": {
+            "type": "string",
+            "example": "test"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1,
+            "minimum": 0
+          },
+          "prefill": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/PrefillToken"
+            }
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "tokens": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          },
+          "top_tokens": {
+            "type": "array",
+            "items": {
+              "type": "array",
+              "items": {
+                "$ref": "#/components/schemas/Token"
+              }
+            }
+          }
+        }
+      },
+      "ChatCompletion": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "model",
+          "system_fingerprint",
+          "choices",
+          "usage"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionComplete"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "example": "1706270835",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          },
+          "usage": {
+            "$ref": "#/components/schemas/Usage"
+          }
+        }
+      },
+      "ChatCompletionChoice": {
+        "type": "object",
+        "required": [
+          "index",
+          "delta"
+        ],
+        "properties": {
+          "delta": {
+            "$ref": "#/components/schemas/ChatCompletionDelta"
+          },
+          "finish_reason": {
+            "type": "string",
+            "nullable": true
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "logprobs": {
+            "type": "number",
+            "format": "float",
+            "nullable": true
+          }
+        }
+      },
+      "ChatCompletionChunk": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "model",
+          "system_fingerprint",
+          "choices"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionChoice"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "example": "1706270978",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          }
+        }
+      },
+      "ChatCompletionDelta": {
+        "type": "object",
+        "required": [
+          "role",
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "type": "string",
+            "example": "What is Deep Learning?"
+          },
+          "role": {
+            "type": "string",
+            "example": "user"
+          }
+        }
+      },
+      "ChatRequest": {
+        "type": "object",
+        "required": [
+          "model"
+        ],
+        "properties": {
+          "frequency_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
+            "example": "1.0",
+            "nullable": true
+          },
+          "logit_bias": {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "float"
+            },
+            "description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
+            "nullable": true
+          },
+          "logprobs": {
+            "type": "boolean",
+            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
+            "example": "false",
+            "nullable": true
+          },
+          "max_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "description": "The maximum number of tokens that can be generated in the chat completion.",
+            "example": "32",
+            "nullable": true,
+            "minimum": 0
+          },
+          "messages": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Message"
+            },
+            "description": "A list of messages comprising the conversation so far."
+          },
+          "model": {
+            "type": "string",
+            "description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "n": {
+            "type": "integer",
+            "format": "int32",
+            "description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
+            "example": "2",
+            "nullable": true,
+            "minimum": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "UNUSED\nNumber between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
+            "example": 0.1,
+            "nullable": true
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "stream": {
+            "type": "boolean"
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
+            "example": 1.0,
+            "nullable": true
+          },
+          "top_logprobs": {
+            "type": "integer",
+            "format": "int32",
+            "description": "UNUSED\nAn integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
+            "example": "5",
+            "nullable": true,
+            "minimum": 0
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+            "example": 0.95,
+            "nullable": true
+          }
+        }
+      },
+      "CompatGenerateRequest": {
+        "type": "object",
+        "required": [
+          "inputs"
+        ],
+        "properties": {
+          "inputs": {
+            "type": "string",
+            "example": "My name is Olivier and I"
+          },
+          "parameters": {
+            "$ref": "#/components/schemas/GenerateParameters"
+          },
+          "stream": {
+            "type": "boolean",
+            "default": "false"
+          }
+        }
+      },
+      "Details": {
+        "type": "object",
+        "required": [
+          "finish_reason",
+          "generated_tokens",
+          "prefill",
+          "tokens"
+        ],
+        "properties": {
+          "best_of_sequences": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/BestOfSequence"
+            },
+            "nullable": true
+          },
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1,
+            "minimum": 0
+          },
+          "prefill": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/PrefillToken"
+            }
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "tokens": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          },
+          "top_tokens": {
+            "type": "array",
+            "items": {
+              "type": "array",
+              "items": {
+                "$ref": "#/components/schemas/Token"
+              }
+            }
+          }
+        }
+      },
+      "ErrorResponse": {
+        "type": "object",
+        "required": [
+          "error",
+          "error_type"
+        ],
+        "properties": {
+          "error": {
+            "type": "string"
+          },
+          "error_type": {
+            "type": "string"
+          }
+        }
+      },
+      "FinishReason": {
+        "type": "string",
+        "enum": [
+          "length",
+          "eos_token",
+          "stop_sequence"
+        ],
+        "example": "Length"
+      },
+      "GenerateParameters": {
+        "type": "object",
+        "properties": {
+          "best_of": {
+            "type": "integer",
+            "default": "null",
+            "example": 1,
+            "nullable": true,
+            "minimum": 0,
+            "exclusiveMinimum": 0
+          },
+          "decoder_input_details": {
+            "type": "boolean",
+            "default": "true"
+          },
+          "details": {
+            "type": "boolean",
+            "default": "true"
+          },
+          "do_sample": {
+            "type": "boolean",
+            "default": "false",
+            "example": true
+          },
+          "max_new_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "default": "100",
+            "example": "20",
+            "nullable": true,
+            "minimum": 0
+          },
+          "repetition_penalty": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 1.03,
+            "nullable": true,
+            "exclusiveMinimum": 0
+          },
+          "return_full_text": {
+            "type": "boolean",
+            "default": "null",
+            "example": false,
+            "nullable": true
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "default": "null",
+            "example": "null",
+            "nullable": true,
+            "minimum": 0,
+            "exclusiveMinimum": 0
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "example": [
+              "photographer"
+            ],
+            "maxItems": 4
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.5,
+            "nullable": true,
+            "exclusiveMinimum": 0
+          },
+          "top_k": {
+            "type": "integer",
+            "format": "int32",
+            "default": "null",
+            "example": 10,
+            "nullable": true,
+            "exclusiveMinimum": 0
+          },
+          "top_n_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "default": "null",
+            "example": 5,
+            "nullable": true,
+            "minimum": 0,
+            "exclusiveMinimum": 0
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.95,
+            "nullable": true,
+            "maximum": 1,
+            "exclusiveMinimum": 0
+          },
+          "truncate": {
+            "type": "integer",
+            "default": "null",
+            "example": "null",
+            "nullable": true,
+            "minimum": 0
+          },
+          "typical_p": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.95,
+            "nullable": true,
+            "maximum": 1,
+            "exclusiveMinimum": 0
+          },
+          "watermark": {
+            "type": "boolean",
+            "default": "false",
+            "example": true
+          }
+        }
+      },
+      "GenerateRequest": {
+        "type": "object",
+        "required": [
+          "inputs"
+        ],
+        "properties": {
+          "inputs": {
+            "type": "string",
+            "example": "My name is Olivier and I"
+          },
+          "parameters": {
+            "$ref": "#/components/schemas/GenerateParameters"
+          }
+        }
+      },
+      "GenerateResponse": {
+        "type": "object",
+        "required": [
+          "generated_text"
+        ],
+        "properties": {
+          "details": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Details"
+              }
+            ],
+            "nullable": true
+          },
+          "generated_text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
+      "Info": {
+        "type": "object",
+        "required": [
+          "model_id",
+          "model_dtype",
+          "model_device_type",
+          "max_concurrent_requests",
+          "max_best_of",
+          "max_stop_sequences",
+          "max_input_length",
+          "max_total_tokens",
+          "waiting_served_ratio",
+          "max_batch_total_tokens",
+          "max_waiting_tokens",
+          "validation_workers",
+          "version"
+        ],
+        "properties": {
+          "docker_label": {
+            "type": "string",
+            "example": "null",
+            "nullable": true
+          },
+          "max_batch_total_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": "32000",
+            "minimum": 0
+          },
+          "max_best_of": {
+            "type": "integer",
+            "example": "2",
+            "minimum": 0
+          },
+          "max_concurrent_requests": {
+            "type": "integer",
+            "description": "Router Parameters",
+            "example": "128",
+            "minimum": 0
+          },
+          "max_input_length": {
+            "type": "integer",
+            "example": "1024",
+            "minimum": 0
+          },
+          "max_stop_sequences": {
+            "type": "integer",
+            "example": "4",
+            "minimum": 0
+          },
+          "max_total_tokens": {
+            "type": "integer",
+            "example": "2048",
+            "minimum": 0
+          },
+          "max_waiting_tokens": {
+            "type": "integer",
+            "example": "20",
+            "minimum": 0
+          },
+          "model_device_type": {
+            "type": "string",
+            "example": "cuda"
+          },
+          "model_dtype": {
+            "type": "string",
+            "example": "torch.float16"
+          },
+          "model_id": {
+            "type": "string",
+            "description": "Model info",
+            "example": "bigscience/blomm-560m"
+          },
+          "model_pipeline_tag": {
+            "type": "string",
+            "example": "text-generation",
+            "nullable": true
+          },
+          "model_sha": {
+            "type": "string",
+            "example": "e985a63cdc139290c5f700ff1929f0b5942cced2",
+            "nullable": true
+          },
+          "sha": {
+            "type": "string",
+            "example": "null",
+            "nullable": true
+          },
+          "validation_workers": {
+            "type": "integer",
+            "example": "2",
+            "minimum": 0
+          },
+          "version": {
+            "type": "string",
+            "description": "Router Info",
+            "example": "0.5.0"
+          },
+          "waiting_served_ratio": {
+            "type": "number",
+            "format": "float",
+            "example": "1.2"
+          }
+        }
+      },
+      "Message": {
+        "type": "object",
+        "required": [
+          "role",
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "type": "string",
+            "example": "My name is David and I"
+          },
+          "role": {
+            "type": "string",
+            "example": "user"
+          }
+        }
+      },
+      "PrefillToken": {
+        "type": "object",
+        "required": [
+          "id",
+          "text",
+          "logprob"
+        ],
+        "properties": {
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "example": 0,
+            "minimum": 0
+          },
+          "logprob": {
+            "type": "number",
+            "format": "float",
+            "example": -0.34,
+            "nullable": true
+          },
+          "text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
+      "SimpleToken": {
+        "type": "object",
+        "required": [
+          "id",
+          "text",
+          "start",
+          "stop"
+        ],
+        "properties": {
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "example": 0,
+            "minimum": 0
+          },
+          "start": {
+            "type": "integer",
+            "example": 0,
+            "minimum": 0
+          },
+          "stop": {
+            "type": "integer",
+            "example": 2,
+            "minimum": 0
+          },
+          "text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
+      "StreamDetails": {
+        "type": "object",
+        "required": [
+          "finish_reason",
+          "generated_tokens"
+        ],
+        "properties": {
+          "finish_reason": {
+            "$ref": "#/components/schemas/FinishReason"
+          },
+          "generated_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "example": 1,
+            "minimum": 0
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          }
+        }
+      },
+      "StreamResponse": {
+        "type": "object",
+        "required": [
+          "index",
+          "token"
+        ],
+        "properties": {
+          "details": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/StreamDetails"
+              }
+            ],
+            "default": "null",
+            "nullable": true
+          },
+          "generated_text": {
+            "type": "string",
+            "default": "null",
+            "example": "test",
+            "nullable": true
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "token": {
+            "$ref": "#/components/schemas/Token"
+          },
+          "top_tokens": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Token"
+            }
+          }
+        }
+      },
+      "Token": {
+        "type": "object",
+        "required": [
+          "id",
+          "text",
+          "logprob",
+          "special"
+        ],
+        "properties": {
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "example": 0,
+            "minimum": 0
+          },
+          "logprob": {
+            "type": "number",
+            "format": "float",
+            "example": -0.34,
+            "nullable": true
+          },
+          "special": {
+            "type": "boolean",
+            "example": "false"
+          },
+          "text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
+      "TokenizeResponse": {
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/SimpleToken"
+        }
+      }
+    }
+  },
+  "tags": [
+    {
+      "name": "Text Generation Inference",
+      "description": "Hugging Face Text Generation Inference API"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/docs/source/basic_tutorials/gated_model_access.md b/docs/source/basic_tutorials/gated_model_access.md
index 1437717f..060d177d 100644
--- a/docs/source/basic_tutorials/gated_model_access.md
+++ b/docs/source/basic_tutorials/gated_model_access.md
@@ -19,6 +19,6 @@ docker run --gpus all \
     --shm-size 1g \
     -e HUGGING_FACE_HUB_TOKEN=$token \
     -p 8080:80 \
-    -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 \
+    -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 \
     --model-id $model
 ```
diff --git a/docs/source/quicktour.md b/docs/source/quicktour.md
index e9a33f04..78ebb8e2 100644
--- a/docs/source/quicktour.md
+++ b/docs/source/quicktour.md
@@ -8,7 +8,7 @@ Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/
 model=tiiuae/falcon-7b-instruct
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
 ```
 
 <Tip warning={true}>
@@ -20,7 +20,7 @@ To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://d
 TGI also supports ROCm-enabled AMD GPUs (only MI210 and MI250 are tested), details are available in the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html). To launch TGI on ROCm GPUs, please use instead:
 
 ```bash
-docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model
+docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4-rocm --model-id $model
 ```
 
 Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
@@ -91,7 +91,7 @@ curl 127.0.0.1:8080/generate \
 To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
 
 ```bash
-docker run ghcr.io/huggingface/text-generation-inference:1.3 --help
+docker run ghcr.io/huggingface/text-generation-inference:1.4 --help
 ```
 
 </Tip>
diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py
index 6391f2a1..0987b3a1 100644
--- a/integration-tests/models/test_flash_phi.py
+++ b/integration-tests/models/test_flash_phi.py
@@ -21,7 +21,7 @@ async def test_flash_phi(flash_phi, response_snapshot):
     )
 
     assert response.details.generated_tokens == 10
-    assert response.generated_text == ": {request}\")\n        response = self"
+    assert response.generated_text == ': {request}")\n        response = self'
     assert response == response_snapshot
 
 
@@ -52,14 +52,12 @@ async def test_flash_phi_all_params(flash_phi, response_snapshot):
 @pytest.mark.asyncio
 @pytest.mark.private
 async def test_flash_phi_load(flash_phi, generate_load, response_snapshot):
-    responses = await generate_load(
-        flash_phi, "Test request", max_new_tokens=10, n=4
-    )
+    responses = await generate_load(flash_phi, "Test request", max_new_tokens=10, n=4)
 
     assert len(responses) == 4
     assert all(
         [r.generated_text == responses[0].generated_text for r in responses]
     ), f"{[r.generated_text  for r in responses]}"
-    assert responses[0].generated_text == ": {request}\")\n        response = self"
+    assert responses[0].generated_text == ': {request}")\n        response = self'
 
     assert responses == response_snapshot
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index f6929587..f0c5add9 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation-integration-tests"
-version = "1.3.4"
+version = "1.4.0"
 description = "Text Generation Inference integration tests"
 authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 
diff --git a/server/poetry.lock b/server/poetry.lock
index 3326d0aa..64b1b74f 100644
--- a/server/poetry.lock
+++ b/server/poetry.lock
@@ -153,21 +153,22 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "23.1.0"
+version = "23.2.0"
 description = "Classes Without Boilerplate"
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
-    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+    {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
+    {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
 ]
 
 [package.extras]
 cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]", "pre-commit"]
+dev = ["attrs[tests]", "pre-commit"]
 docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
 tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 
 [[package]]
 name = "backoff"
@@ -328,20 +329,21 @@ files = [
 
 [[package]]
 name = "datasets"
-version = "2.15.0"
+version = "2.16.1"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = true
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-2.15.0-py3-none-any.whl", hash = "sha256:6d658d23811393dfc982d026082e1650bdaaae28f6a86e651966cb072229a228"},
-    {file = "datasets-2.15.0.tar.gz", hash = "sha256:a26d059370bd7503bd60e9337977199a13117a83f72fb61eda7e66f0c4d50b2b"},
+    {file = "datasets-2.16.1-py3-none-any.whl", hash = "sha256:fafa300c78ff92d521473a3d47d60c2d3e0d6046212cc03ceb6caf6550737257"},
+    {file = "datasets-2.16.1.tar.gz", hash = "sha256:ad3215e9b1984d1de4fda2123bc7319ccbdf1e17d0c3d5590d13debff308a080"},
 ]
 
 [package.dependencies]
 aiohttp = "*"
 dill = ">=0.3.0,<0.3.8"
+filelock = "*"
 fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]}
-huggingface-hub = ">=0.18.0"
+huggingface-hub = ">=0.19.4"
 multiprocess = "*"
 numpy = ">=1.17"
 packaging = "*"
@@ -357,15 +359,15 @@ xxhash = "*"
 apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
 audio = ["librosa", "soundfile (>=0.12.1)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.1.5)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
-quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"]
+quality = ["ruff (>=0.1.5)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
 tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
-tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=6.2.1)"]
 
@@ -767,34 +769,94 @@ setuptools = "*"
 
 [[package]]
 name = "hf-transfer"
-version = "0.1.4"
+version = "0.1.5"
 description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "hf_transfer-0.1.4-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:6ff5fbde30a5bed35ef8f0d4ba78bde9f6d60a233dbff78a0e4035d6e6f71e4c"},
-    {file = "hf_transfer-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1c5c20f76e7f3451cff476b85c55dcb8566ebc94a596cb9eb39c0bb75db8675"},
-    {file = "hf_transfer-0.1.4-cp310-none-win_amd64.whl", hash = "sha256:84c3ce20c68863a7d998711b98726ba9ae8f2e3fc0d685bc2c9ac9833c0f4048"},
-    {file = "hf_transfer-0.1.4-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:dab1cf4e2e6fcb963fe0e48e6b5e3a95cf65ee376c7b6618a05dbb2ef0dde183"},
-    {file = "hf_transfer-0.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c9c7aef90facf45391c86131ed00e74333637735cfec52da4f5170004d0b3f"},
-    {file = "hf_transfer-0.1.4-cp311-none-win_amd64.whl", hash = "sha256:eca1fe6ae145e88455d0a174248080498cea52ad45cee50702070b47dffa421f"},
-    {file = "hf_transfer-0.1.4-cp312-cp312-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:d07c0d26b5c01ad50d22ddcff7d30c4e8cbb823565b7f61e0ddb35f7faeae415"},
-    {file = "hf_transfer-0.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b9cf169c3c64883b07f7ded5e3f14ae1d437eb77448738b88c923fc5597c47"},
-    {file = "hf_transfer-0.1.4-cp312-none-win_amd64.whl", hash = "sha256:6b8518b9ebb85b0238745be81f7b88383c7ea216dd8407d46444bcc7806dc0ef"},
-    {file = "hf_transfer-0.1.4-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ea32e9f91de3f2dad3567577c293f2e81a9309e680def4712ec0c4ea49be6833"},
-    {file = "hf_transfer-0.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e81a10dbf2ac534083da06c200456b5d10ba7a1e8c4c5c48f7ea1ca4cf6af474"},
-    {file = "hf_transfer-0.1.4-cp37-none-win_amd64.whl", hash = "sha256:97555bbff69a0459712e5d25d659c0dc74cb8f9726562ca66241f1e1b081f6a9"},
-    {file = "hf_transfer-0.1.4-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:38bce7a511952e1b804168e956cd3a3b1ff7e38828259c3cdae27614060b90c5"},
-    {file = "hf_transfer-0.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1977e94e8c8fc8a0e9ce74a651d4694629e526da246a492855fcfb710aa489"},
-    {file = "hf_transfer-0.1.4-cp38-none-win_amd64.whl", hash = "sha256:6ca2d2c40e5e94c5de7e502037ad23ac1d803a2a12760b15b3e3f88c616202bd"},
-    {file = "hf_transfer-0.1.4-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:c04a93acb58e50b8da1e2258185e54f6bf48ba24bf95e470310178b7047c1017"},
-    {file = "hf_transfer-0.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3028a807363e0b2c64985c44732ba4ab187a569f013367d2115a6e09ae95031"},
-    {file = "hf_transfer-0.1.4-cp39-none-win_amd64.whl", hash = "sha256:dc9c7c1d0d79fc06baf86d41620623bb6bb2736755329ea6b1ec5faf71e3e36b"},
-    {file = "hf_transfer-0.1.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a466ae2b11d72df9e0005eb8ff7f537d5460c98b64fb6e49f3076ee14040dcf"},
-    {file = "hf_transfer-0.1.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb34a023276936d4716112e17daea4ff98afc35b6113dd0f0383710dc208c058"},
-    {file = "hf_transfer-0.1.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0647b84d7ff0eee1de6479179a5d43d0695001733f17eecc00153f0f8ab1ac"},
-    {file = "hf_transfer-0.1.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27d0bc1f8b79a6d65751efbce7eb02d2c1bd7e4de1a46aac18995461590ce4dd"},
-    {file = "hf_transfer-0.1.4.tar.gz", hash = "sha256:687e090639cd52a48dedbfaa9e455a2c99c5169ece3d911f95983b1d4d4c84ed"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c2953fe35e8a9074507ef77d3c29ec511eead8030d25f5a228a3c03dcda723df"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bc48a561a7e42a5ebfb638fb9c154c4c10fa39e878ce8fb9f9db12f98f24665d"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ccb2b43e1441273aedc83ef9b2419e02028686c9ffcdf0a2bd195657917e24a"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:606bba2d616123b89b749fef1e138118cdf3d84380a6a4fcfe91e1890731ea44"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5b870e1c2d6a87d1e5db890747a2d69712f1cbbc91e64f144e066a9fda16b38"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7c82a9e058d77ac31cbc2d0f9be8011c8e0a2de787c1752225687c54eec00226"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d3eb58ac583ccf03954bef830ea70a4e02271195f24884723b499a6577ffaf64"},
+    {file = "hf_transfer-0.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9fc4b5634a3a61635a8f308eba5df1336bf996b5adc12dc74283743b5bf8fcc"},
+    {file = "hf_transfer-0.1.5-cp310-none-win32.whl", hash = "sha256:9153d589ced01668d7bd9a6a5ead3306d91ded5ebef5cd97185dcd51884d23a2"},
+    {file = "hf_transfer-0.1.5-cp310-none-win_amd64.whl", hash = "sha256:f5a48f0a606e5278117c130bc85849008f00d50a8efcc5a5e9c9b106a96341f5"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:130966ca4f1266bfb9e13a4b6c40170115a2b450255b7c08ef0de85f04f778ef"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c5efdbefcc35425933d320b5188485b3db080508c290748ca1fa5864da1347f"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2cbec42ed94b02d6c21f5fe78c6a65f82703d375dae9448a5efda5c386d2330"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72719931d9f02c1aba13526f431f69cd62a4fc0f7634126c2d1e64d8734760aa"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc404b4dc9b5a873bd29d2e95774d17f3b6ff38d5a19bfe34b549c3c99819cec"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e9988b785fd9bc8cd77819675a2744eb3b6a02edfb820c6b5012b861e4c20da"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e7524c4137646ed3471d5b6fdf7e1c6b7d3d43394eeeb595018e32f233019ed"},
+    {file = "hf_transfer-0.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d96f414a0fd7b7fb57e359a79ad70a5ba2357bb91375dccc1a285edcc296d35d"},
+    {file = "hf_transfer-0.1.5-cp311-none-win32.whl", hash = "sha256:980319ef96fda5abbb7c03ec3803a251f95ed3a9b50f84db25d948994ff6dc34"},
+    {file = "hf_transfer-0.1.5-cp311-none-win_amd64.whl", hash = "sha256:885d89c59dd54b687c74a88dad76006c62be4ad11ee1fecea25582d854434b6e"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3a7d71529550eeba0525cec2155f12e04aab9d57eb3e15015768d222ac80743f"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9688be93d9aab0951cedde7ae374e2e77483d13d2f259512a1f94f30723e5060"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e8584fdd0435d75f8e340605ef3c3d6a5f18f09b75da9bd22fcf0106942b178"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8054e534ec7668fe7d6f9ca0764f1f92e16a40fdd9dd54f4154c5ee6200a00ec"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a29736f0d2e843db59898ce1e26e22b477a3f5f60a508e897daf0cfc49fe307"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c15a8fec25e93284d4ffb73000c47f909bb261eb0f8d32886db5f1e5ab5f07de"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad913daff9f602e0ae13cfa79ba265b1db01255e5784c2469619c70231f49088"},
+    {file = "hf_transfer-0.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bce65b7cd31ef883d67c8ab733c303231bd8b4c4d3370524405f6b416a9bc2a"},
+    {file = "hf_transfer-0.1.5-cp312-none-win32.whl", hash = "sha256:40842f3b35ceaa99bb6029ab3d1c2cc4b948a19d0b5a2884f8516b744f52a934"},
+    {file = "hf_transfer-0.1.5-cp312-none-win_amd64.whl", hash = "sha256:08491bcbd5eefbc0f33cf958671e24fb5d5b72e6e054448cac3b01dfc373dc76"},
+    {file = "hf_transfer-0.1.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d721c3354092797531056757cdbe94e24ec898f5c935dd8f3a50b47083e6ea6"},
+    {file = "hf_transfer-0.1.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b0192ff102c5b74eef7eb11e425fe0e4a3445dcb82b5ab1fab5b8d619c7caa45"},
+    {file = "hf_transfer-0.1.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:95c094afee2cde827081d1b543879e64bb5066a12aba0288d8c479102cfa7a7f"},
+    {file = "hf_transfer-0.1.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5758faa7be41636ac78199fda37b0b4cbd2d9a1dc79c777de3524429fc573f65"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3318bf2436afb24b50a58f668eaaacbee89e23fc00f19e9d2714a9155160098"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72fddff6f76f51145635adde4ba59a3c9e4fe97479f750712770286689acece4"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf2f873d0d386a15f79e9730e27a5dbf7e3a52b9de120a7166a254d983eeb4da"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:95244c6c57360b9750cf6603522a8e1b4c42c8348e962efa62aa11d83e4aa6a6"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e58926e22651924510109aa9b37baeaf0a6ae2014774746bc43e7d92e0aaf3f0"},
+    {file = "hf_transfer-0.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8182241523493dbc6b108e265c5551b98d8f75c7e3a5bd84f5bf9c1db9729cbf"},
+    {file = "hf_transfer-0.1.5-cp37-none-win32.whl", hash = "sha256:a236db066bd017d9a2a543b7414dbcc3fc0df064c3aafd4831ab6b8dcbf1cec2"},
+    {file = "hf_transfer-0.1.5-cp37-none-win_amd64.whl", hash = "sha256:af0d23d84fe2326d309b94d7c9ee5a6987fc8005839dd4efff2e4468df1a9c28"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffc4ea668aa8f35895d1373fc4b1f9544723aa6470b7c21619ed4011d51dc462"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c03105c8854302aa0b29f6ae5c180ce07f63e6895c46efde7eea2aeb4429229d"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cac2abda46b1aac20e75142b84c170af1f8f387ed35ce53a3856148d362c1a26"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2203860a8affb3bcbcbbb009f592f8591543cf3de8b85b5dccf3e65749d8724"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34b0b043baf351d087b39ceae299fdc25aa2c5945be843b1878ec6f563a99a51"},
+    {file = "hf_transfer-0.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf9e6c5282cf45847c1594d6736b3dfe0c42ec97fc624a70f8c2404c050e0a00"},
+    {file = "hf_transfer-0.1.5-cp38-none-win32.whl", hash = "sha256:ea5f975016cca9bf36a76de273f0e477633b0d77dcbbb793260e7b625fb3dc86"},
+    {file = "hf_transfer-0.1.5-cp38-none-win_amd64.whl", hash = "sha256:14c9094353e9f9ed4b834b0f581bd531875fccfac5fd951e49b2ab16f1a267c0"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e4d25e66695f39d731c4129ce030b24456727df4ddd34febcef559109e4907b"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0ffe2399e3ac162c91d408fcb8be5b38893539ddaaecc334faebfd54a98cdd63"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59888b7bf7a56b94af1755f47a4415c3e32df1c88f55261ff62df0657bd6483a"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2bd88ebe18d87aaf7acf1641127efffb6d082e566d4f21f0bcbe67e4192c2964"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2536d5420d1e7e7751aed592c6e59af59c4ceccb8d5e36f2f7a5707f7218efc"},
+    {file = "hf_transfer-0.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c39584bed7a880d2d5abd0396615d9a3f382009a047d6f70646c57feb27209"},
+    {file = "hf_transfer-0.1.5-cp39-none-win32.whl", hash = "sha256:08ce2e01f4057f8e23c2665f1cfb90c3d1f4c93097e99b35907cb1ddadbe4184"},
+    {file = "hf_transfer-0.1.5-cp39-none-win_amd64.whl", hash = "sha256:e10812129996981ee100f943c74963d801187c6048269a81879532baf1b32931"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8747d2b7ae6e8dcf44070ab44494d9d0f4d6a71d10888dce0a72e62a029e65eb"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6492086933e8c4d62e4910af952423fb4fff86c18afff8ece81f228c063f9556"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d173dc9fbbff38a3e67e3a21568f67d05427c4208ce77106e1822574a68ee27"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2aebcee180cf2731404bdf497da3a4683f5cac5f0b71aced8af5936c7d8283c"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e724c4e74763380143526e98b323aeb489fd0b076e70e994454519f35b502b1"},
+    {file = "hf_transfer-0.1.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f5533bc5d061595b3f5ce66509b34da3ba51aa0014b02356ca28fecc1251c2f"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3332cbae4128639f2985be2197125e5f7e9577bf82c7fdad773e5740bb17b296"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb8a7b55da901a246a2607ccda7dd056e2e594e05e0dde91206f5abae0a4ce3b"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b79ad597b1571b162938bbc41d0d01a8788f087f848283723bf42532ac44163f"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:069026e38fc8786a91dac0de5e48a1acd6ac8bb59b9a02049fa73ce88303d468"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4825a78fd9775b51e089f2d071abf7f3e6877be10d1fc2a0c245862bdc94f89a"},
+    {file = "hf_transfer-0.1.5-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930ca177ce417190283a07738449d08c413a9991338115e8619a1597b813d981"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d89d9bae3f5124519efea5b66b33bca68312d7e0a001313b703d710bddc3b317"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:961f2936f21ea8513b89d3025a83349317284a37748dccc0beca62be2e94472c"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f3ef2163503d51d7b0199c5ae95934ebfbae00bc7e1ca2e5fef0230f13ab90d"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:572655ece4259d5d46102bf56276fa02a0df5912dedbd13e816e4f3f104db733"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b883ea8289e1d734d55d873f0e34c8d5304a4f24f18a5cc1b4d3d9b6df608b58"},
+    {file = "hf_transfer-0.1.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a9b86d0d432c9a1c76e29d5a5f233f248ddf9912e1219a3c3b2bc43809980db"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:770e58593d15573c5ff47e7dff22ccf7b33ca6580e1358af83dab43078b835bc"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1088fcca432486e50142dec893c5ddcc5a32ef7e71b53c5d25b321744b4cd6a4"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d7f850c6f5937402c91c0b999792dad66e476165478031e450797063a8cce5c"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:761559f1cb743da773ef831a8c19fc3485e1ceb3cbc9a41135a14d0f4ec53a6d"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bc60ef0efff59b9a65365bc356f5c34a497d0b84df5887c2348855a15911a12d"},
+    {file = "hf_transfer-0.1.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdf4149980b75cfb6c129eef142df6247d49c6820b088661985944c617abc1ff"},
+    {file = "hf_transfer-0.1.5.tar.gz", hash = "sha256:762202a02627bf9eac438e1f0e12ab13b46af06ba88c2c317042597b4fbbbf73"},
 ]
 
 [[package]]
@@ -854,13 +916,13 @@ files = [
 
 [[package]]
 name = "jinja2"
-version = "3.1.2"
+version = "3.1.3"
 description = "A very fast and expressive template engine."
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
-    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
 ]
 
 [package.dependencies]
@@ -889,71 +951,71 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils
 
 [[package]]
 name = "markupsafe"
-version = "2.1.3"
+version = "2.1.4"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = true
 python-versions = ">=3.7"
 files = [
-    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"},
-    {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"},
+    {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"},
+    {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"},
+    {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"},
+    {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"},
+    {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"},
+    {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"},
+    {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"},
 ]
 
 [[package]]
@@ -1104,47 +1166,47 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
 [[package]]
 name = "numpy"
-version = "1.26.2"
+version = "1.26.3"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"},
-    {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"},
-    {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"},
-    {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"},
-    {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"},
-    {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"},
-    {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"},
-    {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"},
-    {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"},
-    {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"},
-    {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"},
+    {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"},
+    {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"},
+    {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"},
+    {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"},
+    {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"},
+    {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"},
+    {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"},
+    {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"},
+    {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"},
+    {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"},
+    {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"},
+    {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"},
+    {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"},
+    {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"},
+    {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"},
+    {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"},
+    {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"},
+    {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"},
+    {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"},
+    {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"},
+    {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"},
+    {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"},
+    {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"},
+    {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"},
+    {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"},
+    {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"},
+    {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"},
+    {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"},
+    {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"},
+    {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"},
+    {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"},
+    {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"},
+    {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"},
 ]
 
 [[package]]
@@ -1455,36 +1517,40 @@ files = [
 
 [[package]]
 name = "pandas"
-version = "2.1.4"
+version = "2.2.0"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"},
-    {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"},
-    {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"},
-    {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"},
-    {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"},
-    {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"},
-    {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"},
-    {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"},
-    {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"},
-    {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"},
-    {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"},
-    {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"},
-    {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"},
-    {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"},
-    {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"},
-    {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"},
-    {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"},
-    {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"},
-    {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"},
-    {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"},
-    {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"},
-    {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"},
-    {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"},
-    {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"},
-    {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"},
+    {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"},
+    {file = "pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:736da9ad4033aeab51d067fc3bd69a0ba36f5a60f66a527b3d72e2030e63280a"},
+    {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e0b4fc3ddceb56ec8a287313bc22abe17ab0eb184069f08fc6a9352a769b18"},
+    {file = "pandas-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20404d2adefe92aed3b38da41d0847a143a09be982a31b85bc7dd565bdba0f4e"},
+    {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ea3ee3f125032bfcade3a4cf85131ed064b4f8dd23e5ce6fa16473e48ebcaf5"},
+    {file = "pandas-2.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9670b3ac00a387620489dfc1bca66db47a787f4e55911f1293063a78b108df1"},
+    {file = "pandas-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a946f210383c7e6d16312d30b238fd508d80d927014f3b33fb5b15c2f895430"},
+    {file = "pandas-2.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a1b438fa26b208005c997e78672f1aa8138f67002e833312e6230f3e57fa87d5"},
+    {file = "pandas-2.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8ce2fbc8d9bf303ce54a476116165220a1fedf15985b09656b4b4275300e920b"},
+    {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2707514a7bec41a4ab81f2ccce8b382961a29fbe9492eab1305bb075b2b1ff4f"},
+    {file = "pandas-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85793cbdc2d5bc32620dc8ffa715423f0c680dacacf55056ba13454a5be5de88"},
+    {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cfd6c2491dc821b10c716ad6776e7ab311f7df5d16038d0b7458bc0b67dc10f3"},
+    {file = "pandas-2.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a146b9dcacc3123aa2b399df1a284de5f46287a4ab4fbfc237eac98a92ebcb71"},
+    {file = "pandas-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbc1b53c0e1fdf16388c33c3cca160f798d38aea2978004dd3f4d3dec56454c9"},
+    {file = "pandas-2.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a41d06f308a024981dcaa6c41f2f2be46a6b186b902c94c2674e8cb5c42985bc"},
+    {file = "pandas-2.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:159205c99d7a5ce89ecfc37cb08ed179de7783737cea403b295b5eda8e9c56d1"},
+    {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1e1f3861ea9132b32f2133788f3b14911b68102d562715d71bd0013bc45440"},
+    {file = "pandas-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:761cb99b42a69005dec2b08854fb1d4888fdf7b05db23a8c5a099e4b886a2106"},
+    {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a20628faaf444da122b2a64b1e5360cde100ee6283ae8effa0d8745153809a2e"},
+    {file = "pandas-2.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f5be5d03ea2073627e7111f61b9f1f0d9625dc3c4d8dda72cc827b0c58a1d042"},
+    {file = "pandas-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:a626795722d893ed6aacb64d2401d017ddc8a2341b49e0384ab9bf7112bdec30"},
+    {file = "pandas-2.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9f66419d4a41132eb7e9a73dcec9486cf5019f52d90dd35547af11bc58f8637d"},
+    {file = "pandas-2.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57abcaeda83fb80d447f28ab0cc7b32b13978f6f733875ebd1ed14f8fbc0f4ab"},
+    {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e60f1f7dba3c2d5ca159e18c46a34e7ca7247a73b5dd1a22b6d59707ed6b899a"},
+    {file = "pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a"},
+    {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:52826b5f4ed658fa2b729264d63f6732b8b29949c7fd234510d57c61dbeadfcd"},
+    {file = "pandas-2.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bde2bc699dbd80d7bc7f9cab1e23a95c4375de615860ca089f34e7c64f4a8de7"},
+    {file = "pandas-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3de918a754bbf2da2381e8a3dcc45eede8cd7775b047b923f9006d5f876802ae"},
+    {file = "pandas-2.2.0.tar.gz", hash = "sha256:30b83f7c3eb217fb4d1b494a57a2fda5444f17834f5df2de6b2ffff68dc3c8e2"},
 ]
 
 [package.dependencies]
@@ -1495,31 +1561,31 @@ numpy = [
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
-tzdata = ">=2022.1"
+tzdata = ">=2022.7"
 
 [package.extras]
-all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
-aws = ["s3fs (>=2022.05.0)"]
-clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
-compression = ["zstandard (>=0.17.0)"]
-computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
 consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
-excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
-feather = ["pyarrow (>=7.0.0)"]
-fss = ["fsspec (>=2022.05.0)"]
-gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
-hdf5 = ["tables (>=3.7.0)"]
-html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
-mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
-output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
-parquet = ["pyarrow (>=7.0.0)"]
-performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
-plot = ["matplotlib (>=3.6.1)"]
-postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
-spss = ["pyreadstat (>=1.1.5)"]
-sql-other = ["SQLAlchemy (>=1.4.36)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
-xml = ["lxml (>=4.8.0)"]
+xml = ["lxml (>=4.9.2)"]
 
 [[package]]
 name = "peft"
@@ -1550,80 +1616,98 @@ test = ["black (>=22.0,<23.0)", "datasets", "diffusers", "hf-doc-builder", "para
 
 [[package]]
 name = "pillow"
-version = "10.1.0"
+version = "10.2.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1ab05f3db77e98f93964697c8efc49c7954b08dd61cff526b7f2531a22410106"},
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6932a7652464746fcb484f7fc3618e6503d2066d853f68a4bd97193a3996e273"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f63b5a68daedc54c7c3464508d8c12075e56dcfbd42f8c1bf40169061ae666"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0949b55eb607898e28eaccb525ab104b2d86542a85c74baf3a6dc24002edec2"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ae88931f93214777c7a3aa0a8f92a683f83ecde27f65a45f95f22d289a69e593"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb01ca85b2361b09480784a7931fc648ed8b7836f01fb9241141b968feb1db"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d27b5997bdd2eb9fb199982bb7eb6164db0426904020dc38c10203187ae2ff2f"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7df5608bc38bd37ef585ae9c38c9cd46d7c81498f086915b0f97255ea60c2818"},
-    {file = "Pillow-10.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:41f67248d92a5e0a2076d3517d8d4b1e41a97e2df10eb8f93106c89107f38b57"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1fb29c07478e6c06a46b867e43b0bcdb241b44cc52be9bc25ce5944eed4648e7"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2cdc65a46e74514ce742c2013cd4a2d12e8553e3a2563c64879f7c7e4d28bce7"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d08cd0a2ecd2a8657bd3d82c71efd5a58edb04d9308185d66c3a5a5bed9610"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062a1610e3bc258bff2328ec43f34244fcec972ee0717200cb1425214fe5b839"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:61f1a9d247317fa08a308daaa8ee7b3f760ab1809ca2da14ecc88ae4257d6172"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a646e48de237d860c36e0db37ecaecaa3619e6f3e9d5319e527ccbc8151df061"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:47e5bf85b80abc03be7455c95b6d6e4896a62f6541c1f2ce77a7d2bb832af262"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a92386125e9ee90381c3369f57a2a50fa9e6aa8b1cf1d9c4b200d41a7dd8e992"},
-    {file = "Pillow-10.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f7c276c05a9767e877a0b4c5050c8bee6a6d960d7f0c11ebda6b99746068c2a"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:a89b8312d51715b510a4fe9fc13686283f376cfd5abca8cd1c65e4c76e21081b"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:00f438bb841382b15d7deb9a05cc946ee0f2c352653c7aa659e75e592f6fa17d"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d929a19f5469b3f4df33a3df2983db070ebb2088a1e145e18facbc28cae5b27"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92109192b360634a4489c0c756364c0c3a2992906752165ecb50544c251312"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:0248f86b3ea061e67817c47ecbe82c23f9dd5d5226200eb9090b3873d3ca32de"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9882a7451c680c12f232a422730f986a1fcd808da0fd428f08b671237237d651"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1c3ac5423c8c1da5928aa12c6e258921956757d976405e9467c5f39d1d577a4b"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:806abdd8249ba3953c33742506fe414880bad78ac25cc9a9b1c6ae97bedd573f"},
-    {file = "Pillow-10.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:eaed6977fa73408b7b8a24e8b14e59e1668cfc0f4c40193ea7ced8e210adf996"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:fe1e26e1ffc38be097f0ba1d0d07fcade2bcfd1d023cda5b29935ae8052bd793"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7e3daa202beb61821c06d2517428e8e7c1aab08943e92ec9e5755c2fc9ba5e"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24fadc71218ad2b8ffe437b54876c9382b4a29e030a05a9879f615091f42ffc2"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1d323703cfdac2036af05191b969b910d8f115cf53093125e4058f62012c9a"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:912e3812a1dbbc834da2b32299b124b5ddcb664ed354916fd1ed6f193f0e2d01"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7dbaa3c7de82ef37e7708521be41db5565004258ca76945ad74a8e998c30af8d"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9d7bc666bd8c5a4225e7ac71f2f9d12466ec555e89092728ea0f5c0c2422ea80"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baada14941c83079bf84c037e2d8b7506ce201e92e3d2fa0d1303507a8538212"},
-    {file = "Pillow-10.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:2ef6721c97894a7aa77723740a09547197533146fba8355e86d6d9a4a1056b14"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0a026c188be3b443916179f5d04548092e253beb0c3e2ee0a4e2cdad72f66099"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04f6f6149f266a100374ca3cc368b67fb27c4af9f1cc8cb6306d849dcdf12616"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb40c011447712d2e19cc261c82655f75f32cb724788df315ed992a4d65696bb"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a8413794b4ad9719346cd9306118450b7b00d9a15846451549314a58ac42219"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c9aeea7b63edb7884b031a35305629a7593272b54f429a9869a4f63a1bf04c34"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b4005fee46ed9be0b8fb42be0c20e79411533d1fd58edabebc0dd24626882cfd"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0152565c6aa6ebbfb1e5d8624140a440f2b99bf7afaafbdbf6430426497f28"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d921bc90b1defa55c9917ca6b6b71430e4286fc9e44c55ead78ca1a9f9eba5f2"},
-    {file = "Pillow-10.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfe96560c6ce2f4c07d6647af2d0f3c54cc33289894ebd88cfbb3bcd5391e256"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:937bdc5a7f5343d1c97dc98149a0be7eb9704e937fe3dc7140e229ae4fc572a7"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c25762197144e211efb5f4e8ad656f36c8d214d390585d1d21281f46d556ba"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:afc8eef765d948543a4775f00b7b8c079b3321d6b675dde0d02afa2ee23000b4"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:883f216eac8712b83a63f41b76ddfb7b2afab1b74abbb413c5df6680f071a6b9"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b920e4d028f6442bea9a75b7491c063f0b9a3972520731ed26c83e254302eb1e"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c41d960babf951e01a49c9746f92c5a7e0d939d1652d7ba30f6b3090f27e412"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1fafabe50a6977ac70dfe829b2d5735fd54e190ab55259ec8aea4aaea412fa0b"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b834f4b16173e5b92ab6566f0473bfb09f939ba14b23b8da1f54fa63e4b623f"},
-    {file = "Pillow-10.1.0.tar.gz", hash = "sha256:e6bf8de6c36ed96c86ea3b6e1d5273c53f46ef518a062464cd7ef5dd2cf92e38"},
+    {file = "pillow-10.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:7823bdd049099efa16e4246bdf15e5a13dbb18a51b68fa06d6c1d4d8b99a796e"},
+    {file = "pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:83b2021f2ade7d1ed556bc50a399127d7fb245e725aa0113ebd05cfe88aaf588"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad5ff2f13d69b7e74ce5b4ecd12cc0ec530fcee76356cac6742785ff71c452"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2b52b37dad6d9ec64e653637a096905b258d2fc2b984c41ae7d08b938a67e4"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:322bdf3c9b556e9ffb18f93462e5f749d3444ce081290352c6070d014c93feb2"},
+    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51f1a1bffc50e2e9492e87d8e09a17c5eea8409cda8d3f277eb6edc82813c17c"},
+    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69ffdd6120a4737710a9eee73e1d2e37db89b620f702754b8f6e62594471dee0"},
+    {file = "pillow-10.2.0-cp310-cp310-win32.whl", hash = "sha256:c6dafac9e0f2b3c78df97e79af707cdc5ef8e88208d686a4847bab8266870023"},
+    {file = "pillow-10.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:aebb6044806f2e16ecc07b2a2637ee1ef67a11840a66752751714a0d924adf72"},
+    {file = "pillow-10.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:7049e301399273a0136ff39b84c3678e314f2158f50f517bc50285fb5ec847ad"},
+    {file = "pillow-10.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35bb52c37f256f662abdfa49d2dfa6ce5d93281d323a9af377a120e89a9eafb5"},
+    {file = "pillow-10.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c23f307202661071d94b5e384e1e1dc7dfb972a28a2310e4ee16103e66ddb67"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773efe0603db30c281521a7c0214cad7836c03b8ccff897beae9b47c0b657d61"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11fa2e5984b949b0dd6d7a94d967743d87c577ff0b83392f17cb3990d0d2fd6e"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:716d30ed977be8b37d3ef185fecb9e5a1d62d110dfbdcd1e2a122ab46fddb03f"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a086c2af425c5f62a65e12fbf385f7c9fcb8f107d0849dba5839461a129cf311"},
+    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c8de2789052ed501dd829e9cae8d3dcce7acb4777ea4a479c14521c942d395b1"},
+    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:609448742444d9290fd687940ac0b57fb35e6fd92bdb65386e08e99af60bf757"},
+    {file = "pillow-10.2.0-cp311-cp311-win32.whl", hash = "sha256:823ef7a27cf86df6597fa0671066c1b596f69eba53efa3d1e1cb8b30f3533068"},
+    {file = "pillow-10.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:1da3b2703afd040cf65ec97efea81cfba59cdbed9c11d8efc5ab09df9509fc56"},
+    {file = "pillow-10.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:edca80cbfb2b68d7b56930b84a0e45ae1694aeba0541f798e908a49d66b837f1"},
+    {file = "pillow-10.2.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:1b5e1b74d1bd1b78bc3477528919414874748dd363e6272efd5abf7654e68bef"},
+    {file = "pillow-10.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0eae2073305f451d8ecacb5474997c08569fb4eb4ac231ffa4ad7d342fdc25ac"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c2286c23cd350b80d2fc9d424fc797575fb16f854b831d16fd47ceec078f2c"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e23412b5c41e58cec602f1135c57dfcf15482013ce6e5f093a86db69646a5aa"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:52a50aa3fb3acb9cf7213573ef55d31d6eca37f5709c69e6858fe3bc04a5c2a2"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:127cee571038f252a552760076407f9cff79761c3d436a12af6000cd182a9d04"},
+    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8d12251f02d69d8310b046e82572ed486685c38f02176bd08baf216746eb947f"},
+    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54f1852cd531aa981bc0965b7d609f5f6cc8ce8c41b1139f6ed6b3c54ab82bfb"},
+    {file = "pillow-10.2.0-cp312-cp312-win32.whl", hash = "sha256:257d8788df5ca62c980314053197f4d46eefedf4e6175bc9412f14412ec4ea2f"},
+    {file = "pillow-10.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:154e939c5f0053a383de4fd3d3da48d9427a7e985f58af8e94d0b3c9fcfcf4f9"},
+    {file = "pillow-10.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:f379abd2f1e3dddb2b61bc67977a6b5a0a3f7485538bcc6f39ec76163891ee48"},
+    {file = "pillow-10.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8373c6c251f7ef8bda6675dd6d2b3a0fcc31edf1201266b5cf608b62a37407f9"},
+    {file = "pillow-10.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:870ea1ada0899fd0b79643990809323b389d4d1d46c192f97342eeb6ee0b8483"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4b6b1e20608493548b1f32bce8cca185bf0480983890403d3b8753e44077129"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3031709084b6e7852d00479fd1d310b07d0ba82765f973b543c8af5061cf990e"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:3ff074fc97dd4e80543a3e91f69d58889baf2002b6be64347ea8cf5533188213"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:cb4c38abeef13c61d6916f264d4845fab99d7b711be96c326b84df9e3e0ff62d"},
+    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1b3020d90c2d8e1dae29cf3ce54f8094f7938460fb5ce8bc5c01450b01fbaf6"},
+    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:170aeb00224ab3dc54230c797f8404507240dd868cf52066f66a41b33169bdbe"},
+    {file = "pillow-10.2.0-cp38-cp38-win32.whl", hash = "sha256:c4225f5220f46b2fde568c74fca27ae9771536c2e29d7c04f4fb62c83275ac4e"},
+    {file = "pillow-10.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0689b5a8c5288bc0504d9fcee48f61a6a586b9b98514d7d29b840143d6734f39"},
+    {file = "pillow-10.2.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b792a349405fbc0163190fde0dc7b3fef3c9268292586cf5645598b48e63dc67"},
+    {file = "pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c570f24be1e468e3f0ce7ef56a89a60f0e05b30a3669a459e419c6eac2c35364"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8ecd059fdaf60c1963c58ceb8997b32e9dc1b911f5da5307aab614f1ce5c2fb"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c365fd1703040de1ec284b176d6af5abe21b427cb3a5ff68e0759e1e313a5e7e"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:70c61d4c475835a19b3a5aa42492409878bbca7438554a1f89d20d58a7c75c01"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b6f491cdf80ae540738859d9766783e3b3c8e5bd37f5dfa0b76abdecc5081f13"},
+    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d189550615b4948f45252d7f005e53c2040cea1af5b60d6f79491a6e147eef7"},
+    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:49d9ba1ed0ef3e061088cd1e7538a0759aab559e2e0a80a36f9fd9d8c0c21591"},
+    {file = "pillow-10.2.0-cp39-cp39-win32.whl", hash = "sha256:babf5acfede515f176833ed6028754cbcd0d206f7f614ea3447d67c33be12516"},
+    {file = "pillow-10.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8"},
+    {file = "pillow-10.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:0fb3e7fc88a14eacd303e90481ad983fd5b69c761e9e6ef94c983f91025da869"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:322209c642aabdd6207517e9739c704dc9f9db943015535783239022002f054a"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eedd52442c0a5ff4f887fab0c1c0bb164d8635b32c894bc1faf4c618dd89df2"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb28c753fd5eb3dd859b4ee95de66cc62af91bcff5db5f2571d32a520baf1f04"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33870dc4653c5017bf4c8873e5488d8f8d5f8935e2f1fb9a2208c47cdd66efd2"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3c31822339516fb3c82d03f30e22b1d038da87ef27b6a78c9549888f8ceda39a"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a2b56ba36e05f973d450582fb015594aaa78834fefe8dfb8fcd79b93e64ba4c6"},
+    {file = "pillow-10.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d8e6aeb9201e655354b3ad049cb77d19813ad4ece0df1249d3c793de3774f8c7"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:2247178effb34a77c11c0e8ac355c7a741ceca0a732b27bf11e747bbc950722f"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15587643b9e5eb26c48e49a7b33659790d28f190fc514a322d55da2fb5c2950e"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753cd8f2086b2b80180d9b3010dd4ed147efc167c90d3bf593fe2af21265e5a5"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7c8f97e8e7a9009bcacbe3766a36175056c12f9a44e6e6f2d5caad06dcfbf03b"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d1b35bcd6c5543b9cb547dee3150c93008f8dd0f1fef78fc0cd2b141c5baf58a"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868"},
+    {file = "pillow-10.2.0.tar.gz", hash = "sha256:e87f0b2c78157e12d7686b27d63c070fd65d994e8ddae6f328e0dcf4a0cd007e"},
 ]
 
 [package.extras]
 docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
 
 [[package]]
 name = "pluggy"
-version = "1.3.0"
+version = "1.4.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
-    {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
+    {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"},
+    {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"},
 ]
 
 [package.extras]
@@ -1632,47 +1716,47 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "protobuf"
-version = "4.25.1"
+version = "4.25.2"
 description = ""
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"},
-    {file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"},
-    {file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"},
-    {file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"},
-    {file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"},
-    {file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"},
-    {file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"},
-    {file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"},
-    {file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"},
+    {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"},
+    {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"},
+    {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"},
+    {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"},
+    {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"},
+    {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"},
+    {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"},
+    {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"},
+    {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"},
 ]
 
 [[package]]
 name = "psutil"
-version = "5.9.6"
+version = "5.9.8"
 description = "Cross-platform lib for process and system monitoring in Python."
 optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
-    {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"},
-    {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:91ecd2d9c00db9817a4b4192107cf6954addb5d9d67a969a4f436dbc9200f88c"},
-    {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:10e8c17b4f898d64b121149afb136c53ea8b68c7531155147867b7b1ac9e7e28"},
-    {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:18cd22c5db486f33998f37e2bb054cc62fd06646995285e02a51b1e08da97017"},
-    {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:ca2780f5e038379e520281e4c032dddd086906ddff9ef0d1b9dcf00710e5071c"},
-    {file = "psutil-5.9.6-cp27-none-win32.whl", hash = "sha256:70cb3beb98bc3fd5ac9ac617a327af7e7f826373ee64c80efd4eb2856e5051e9"},
-    {file = "psutil-5.9.6-cp27-none-win_amd64.whl", hash = "sha256:51dc3d54607c73148f63732c727856f5febec1c7c336f8f41fcbd6315cce76ac"},
-    {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"},
-    {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"},
-    {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"},
-    {file = "psutil-5.9.6-cp36-cp36m-win32.whl", hash = "sha256:3ebf2158c16cc69db777e3c7decb3c0f43a7af94a60d72e87b2823aebac3d602"},
-    {file = "psutil-5.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:ff18b8d1a784b810df0b0fff3bcb50ab941c3b8e2c8de5726f9c71c601c611aa"},
-    {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"},
-    {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"},
-    {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"},
-    {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"},
+    {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"},
+    {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"},
+    {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"},
+    {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"},
+    {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"},
+    {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"},
+    {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"},
+    {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"},
+    {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"},
+    {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"},
+    {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"},
+    {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"},
+    {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"},
+    {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"},
 ]
 
 [package.extras]
@@ -1680,51 +1764,51 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
 
 [[package]]
 name = "pyarrow"
-version = "14.0.1"
+version = "15.0.0"
 description = "Python library for Apache Arrow"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"},
-    {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"},
-    {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"},
-    {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"},
-    {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"},
-    {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"},
-    {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"},
-    {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"},
-    {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"},
-    {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"},
-    {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"},
-    {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"},
-    {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"},
-    {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"},
-    {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"},
-    {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"},
+    {file = "pyarrow-15.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0a524532fd6dd482edaa563b686d754c70417c2f72742a8c990b322d4c03a15d"},
+    {file = "pyarrow-15.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a6bdb314affa9c2e0d5dddf3d9cbb9ef4a8dddaa68669975287d47ece67642"},
+    {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66958fd1771a4d4b754cd385835e66a3ef6b12611e001d4e5edfcef5f30391e2"},
+    {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f500956a49aadd907eaa21d4fff75f73954605eaa41f61cb94fb008cf2e00c6"},
+    {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6f87d9c4f09e049c2cade559643424da84c43a35068f2a1c4653dc5b1408a929"},
+    {file = "pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:85239b9f93278e130d86c0e6bb455dcb66fc3fd891398b9d45ace8799a871a1e"},
+    {file = "pyarrow-15.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b8d43e31ca16aa6e12402fcb1e14352d0d809de70edd185c7650fe80e0769e3"},
+    {file = "pyarrow-15.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:fa7cd198280dbd0c988df525e50e35b5d16873e2cdae2aaaa6363cdb64e3eec5"},
+    {file = "pyarrow-15.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8780b1a29d3c8b21ba6b191305a2a607de2e30dab399776ff0aa09131e266340"},
+    {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0ec198ccc680f6c92723fadcb97b74f07c45ff3fdec9dd765deb04955ccf19"},
+    {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036a7209c235588c2f07477fe75c07e6caced9b7b61bb897c8d4e52c4b5f9555"},
+    {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2bd8a0e5296797faf9a3294e9fa2dc67aa7f10ae2207920dbebb785c77e9dbe5"},
+    {file = "pyarrow-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e8ebed6053dbe76883a822d4e8da36860f479d55a762bd9e70d8494aed87113e"},
+    {file = "pyarrow-15.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:17d53a9d1b2b5bd7d5e4cd84d018e2a45bc9baaa68f7e6e3ebed45649900ba99"},
+    {file = "pyarrow-15.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9950a9c9df24090d3d558b43b97753b8f5867fb8e521f29876aa021c52fda351"},
+    {file = "pyarrow-15.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:003d680b5e422d0204e7287bb3fa775b332b3fce2996aa69e9adea23f5c8f970"},
+    {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f75fce89dad10c95f4bf590b765e3ae98bcc5ba9f6ce75adb828a334e26a3d40"},
+    {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca9cb0039923bec49b4fe23803807e4ef39576a2bec59c32b11296464623dc2"},
+    {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ed5a78ed29d171d0acc26a305a4b7f83c122d54ff5270810ac23c75813585e4"},
+    {file = "pyarrow-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6eda9e117f0402dfcd3cd6ec9bfee89ac5071c48fc83a84f3075b60efa96747f"},
+    {file = "pyarrow-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a3a6180c0e8f2727e6f1b1c87c72d3254cac909e609f35f22532e4115461177"},
+    {file = "pyarrow-15.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:19a8918045993349b207de72d4576af0191beef03ea655d8bdb13762f0cd6eac"},
+    {file = "pyarrow-15.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0ec076b32bacb6666e8813a22e6e5a7ef1314c8069d4ff345efa6246bc38593"},
+    {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5db1769e5d0a77eb92344c7382d6543bea1164cca3704f84aa44e26c67e320fb"},
+    {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2617e3bf9df2a00020dd1c1c6dce5cc343d979efe10bc401c0632b0eef6ef5b"},
+    {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:d31c1d45060180131caf10f0f698e3a782db333a422038bf7fe01dace18b3a31"},
+    {file = "pyarrow-15.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:c8c287d1d479de8269398b34282e206844abb3208224dbdd7166d580804674b7"},
+    {file = "pyarrow-15.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:07eb7f07dc9ecbb8dace0f58f009d3a29ee58682fcdc91337dfeb51ea618a75b"},
+    {file = "pyarrow-15.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:47af7036f64fce990bb8a5948c04722e4e3ea3e13b1007ef52dfe0aa8f23cf7f"},
+    {file = "pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93768ccfff85cf044c418bfeeafce9a8bb0cee091bd8fd19011aff91e58de540"},
+    {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6ee87fd6892700960d90abb7b17a72a5abb3b64ee0fe8db6c782bcc2d0dc0b4"},
+    {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:001fca027738c5f6be0b7a3159cc7ba16a5c52486db18160909a0831b063c4e4"},
+    {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:d1c48648f64aec09accf44140dccb92f4f94394b8d79976c426a5b79b11d4fa7"},
+    {file = "pyarrow-15.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:972a0141be402bb18e3201448c8ae62958c9c7923dfaa3b3d4530c835ac81aed"},
+    {file = "pyarrow-15.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:f01fc5cf49081426429127aa2d427d9d98e1cb94a32cb961d583a70b7c4504e6"},
+    {file = "pyarrow-15.0.0.tar.gz", hash = "sha256:876858f549d540898f927eba4ef77cd549ad8d24baa3207cf1b72e5788b50e83"},
 ]
 
 [package.dependencies]
-numpy = ">=1.16.6"
+numpy = ">=1.16.6,<2"
 
 [[package]]
 name = "pyarrow-hotfix"
@@ -1739,13 +1823,13 @@ files = [
 
 [[package]]
 name = "pytest"
-version = "7.4.3"
+version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"},
-    {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"},
+    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
+    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 ]
 
 [package.dependencies]
@@ -1845,99 +1929,104 @@ files = [
 
 [[package]]
 name = "regex"
-version = "2023.10.3"
+version = "2023.12.25"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"},
-    {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"},
-    {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"},
-    {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"},
-    {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"},
-    {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"},
-    {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"},
-    {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"},
-    {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"},
-    {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"},
-    {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"},
-    {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"},
-    {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"},
-    {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"},
-    {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"},
-    {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"},
-    {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"},
-    {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"},
-    {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"},
-    {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"},
-    {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"},
-    {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"},
-    {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"},
-    {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"},
+    {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0694219a1d54336fd0445ea382d49d36882415c0134ee1e8332afd1529f0baa5"},
+    {file = "regex-2023.12.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b014333bd0217ad3d54c143de9d4b9a3ca1c5a29a6d0d554952ea071cff0f1f8"},
+    {file = "regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d865984b3f71f6d0af64d0d88f5733521698f6c16f445bb09ce746c92c97c586"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e0eabac536b4cc7f57a5f3d095bfa557860ab912f25965e08fe1545e2ed8b4c"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c25a8ad70e716f96e13a637802813f65d8a6760ef48672aa3502f4c24ea8b400"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9b6d73353f777630626f403b0652055ebfe8ff142a44ec2cf18ae470395766e"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9cc99d6946d750eb75827cb53c4371b8b0fe89c733a94b1573c9dd16ea6c9e4"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88d1f7bef20c721359d8675f7d9f8e414ec5003d8f642fdfd8087777ff7f94b5"},
+    {file = "regex-2023.12.25-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3fe77aec8f1995611f966d0c656fdce398317f850d0e6e7aebdfe61f40e1cd"},
+    {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7aa47c2e9ea33a4a2a05f40fcd3ea36d73853a2aae7b4feab6fc85f8bf2c9704"},
+    {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:df26481f0c7a3f8739fecb3e81bc9da3fcfae34d6c094563b9d4670b047312e1"},
+    {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c40281f7d70baf6e0db0c2f7472b31609f5bc2748fe7275ea65a0b4601d9b392"},
+    {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:d94a1db462d5690ebf6ae86d11c5e420042b9898af5dcf278bd97d6bda065423"},
+    {file = "regex-2023.12.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba1b30765a55acf15dce3f364e4928b80858fa8f979ad41f862358939bdd1f2f"},
+    {file = "regex-2023.12.25-cp310-cp310-win32.whl", hash = "sha256:150c39f5b964e4d7dba46a7962a088fbc91f06e606f023ce57bb347a3b2d4630"},
+    {file = "regex-2023.12.25-cp310-cp310-win_amd64.whl", hash = "sha256:09da66917262d9481c719599116c7dc0c321ffcec4b1f510c4f8a066f8768105"},
+    {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1b9d811f72210fa9306aeb88385b8f8bcef0dfbf3873410413c00aa94c56c2b6"},
+    {file = "regex-2023.12.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d902a43085a308cef32c0d3aea962524b725403fd9373dea18110904003bac97"},
+    {file = "regex-2023.12.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d166eafc19f4718df38887b2bbe1467a4f74a9830e8605089ea7a30dd4da8887"},
+    {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7ad32824b7f02bb3c9f80306d405a1d9b7bb89362d68b3c5a9be53836caebdb"},
+    {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:636ba0a77de609d6510235b7f0e77ec494d2657108f777e8765efc060094c98c"},
+    {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fda75704357805eb953a3ee15a2b240694a9a514548cd49b3c5124b4e2ad01b"},
+    {file = "regex-2023.12.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f72cbae7f6b01591f90814250e636065850c5926751af02bb48da94dfced7baa"},
+    {file = "regex-2023.12.25-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db2a0b1857f18b11e3b0e54ddfefc96af46b0896fb678c85f63fb8c37518b3e7"},
+    {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7502534e55c7c36c0978c91ba6f61703faf7ce733715ca48f499d3dbbd7657e0"},
+    {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e8c7e08bb566de4faaf11984af13f6bcf6a08f327b13631d41d62592681d24fe"},
+    {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:283fc8eed679758de38fe493b7d7d84a198b558942b03f017b1f94dda8efae80"},
+    {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f44dd4d68697559d007462b0a3a1d9acd61d97072b71f6d1968daef26bc744bd"},
+    {file = "regex-2023.12.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:67d3ccfc590e5e7197750fcb3a2915b416a53e2de847a728cfa60141054123d4"},
+    {file = "regex-2023.12.25-cp311-cp311-win32.whl", hash = "sha256:68191f80a9bad283432385961d9efe09d783bcd36ed35a60fb1ff3f1ec2efe87"},
+    {file = "regex-2023.12.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d2af3f6b8419661a0c421584cfe8aaec1c0e435ce7e47ee2a97e344b98f794f"},
+    {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8a0ccf52bb37d1a700375a6b395bff5dd15c50acb745f7db30415bae3c2b0715"},
+    {file = "regex-2023.12.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c3c4a78615b7762740531c27cf46e2f388d8d727d0c0c739e72048beb26c8a9d"},
+    {file = "regex-2023.12.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ad83e7545b4ab69216cef4cc47e344d19622e28aabec61574b20257c65466d6a"},
+    {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7a635871143661feccce3979e1727c4e094f2bdfd3ec4b90dfd4f16f571a87a"},
+    {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d498eea3f581fbe1b34b59c697512a8baef88212f92e4c7830fcc1499f5b45a5"},
+    {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43f7cd5754d02a56ae4ebb91b33461dc67be8e3e0153f593c509e21d219c5060"},
+    {file = "regex-2023.12.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51f4b32f793812714fd5307222a7f77e739b9bc566dc94a18126aba3b92b98a3"},
+    {file = "regex-2023.12.25-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba99d8077424501b9616b43a2d208095746fb1284fc5ba490139651f971d39d9"},
+    {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4bfc2b16e3ba8850e0e262467275dd4d62f0d045e0e9eda2bc65078c0110a11f"},
+    {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8c2c19dae8a3eb0ea45a8448356ed561be843b13cbc34b840922ddf565498c1c"},
+    {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:60080bb3d8617d96f0fb7e19796384cc2467447ef1c491694850ebd3670bc457"},
+    {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b77e27b79448e34c2c51c09836033056a0547aa360c45eeeb67803da7b0eedaf"},
+    {file = "regex-2023.12.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:518440c991f514331f4850a63560321f833979d145d7d81186dbe2f19e27ae3d"},
+    {file = "regex-2023.12.25-cp312-cp312-win32.whl", hash = "sha256:e2610e9406d3b0073636a3a2e80db05a02f0c3169b5632022b4e81c0364bcda5"},
+    {file = "regex-2023.12.25-cp312-cp312-win_amd64.whl", hash = "sha256:cc37b9aeebab425f11f27e5e9e6cf580be7206c6582a64467a14dda211abc232"},
+    {file = "regex-2023.12.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:da695d75ac97cb1cd725adac136d25ca687da4536154cdc2815f576e4da11c69"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d126361607b33c4eb7b36debc173bf25d7805847346dd4d99b5499e1fef52bc7"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4719bb05094d7d8563a450cf8738d2e1061420f79cfcc1fa7f0a44744c4d8f73"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dd58946bce44b53b06d94aa95560d0b243eb2fe64227cba50017a8d8b3cd3e2"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22a86d9fff2009302c440b9d799ef2fe322416d2d58fc124b926aa89365ec482"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2aae8101919e8aa05ecfe6322b278f41ce2994c4a430303c4cd163fef746e04f"},
+    {file = "regex-2023.12.25-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e692296c4cc2873967771345a876bcfc1c547e8dd695c6b89342488b0ea55cd8"},
+    {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:263ef5cc10979837f243950637fffb06e8daed7f1ac1e39d5910fd29929e489a"},
+    {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:d6f7e255e5fa94642a0724e35406e6cb7001c09d476ab5fce002f652b36d0c39"},
+    {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:88ad44e220e22b63b0f8f81f007e8abbb92874d8ced66f32571ef8beb0643b2b"},
+    {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3a17d3ede18f9cedcbe23d2daa8a2cd6f59fe2bf082c567e43083bba3fb00347"},
+    {file = "regex-2023.12.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d15b274f9e15b1a0b7a45d2ac86d1f634d983ca40d6b886721626c47a400bf39"},
+    {file = "regex-2023.12.25-cp37-cp37m-win32.whl", hash = "sha256:ed19b3a05ae0c97dd8f75a5d8f21f7723a8c33bbc555da6bbe1f96c470139d3c"},
+    {file = "regex-2023.12.25-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d1047952c0b8104a1d371f88f4ab62e6275567d4458c1e26e9627ad489b445"},
+    {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b43523d7bc2abd757119dbfb38af91b5735eea45537ec6ec3a5ec3f9562a1c53"},
+    {file = "regex-2023.12.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:efb2d82f33b2212898f1659fb1c2e9ac30493ac41e4d53123da374c3b5541e64"},
+    {file = "regex-2023.12.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7fca9205b59c1a3d5031f7e64ed627a1074730a51c2a80e97653e3e9fa0d415"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086dd15e9435b393ae06f96ab69ab2d333f5d65cbe65ca5a3ef0ec9564dfe770"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e81469f7d01efed9b53740aedd26085f20d49da65f9c1f41e822a33992cb1590"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34e4af5b27232f68042aa40a91c3b9bb4da0eeb31b7632e0091afc4310afe6cb"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9852b76ab558e45b20bf1893b59af64a28bd3820b0c2efc80e0a70a4a3ea51c1"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff100b203092af77d1a5a7abe085b3506b7eaaf9abf65b73b7d6905b6cb76988"},
+    {file = "regex-2023.12.25-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cc038b2d8b1470364b1888a98fd22d616fba2b6309c5b5f181ad4483e0017861"},
+    {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:094ba386bb5c01e54e14434d4caabf6583334090865b23ef58e0424a6286d3dc"},
+    {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5cd05d0f57846d8ba4b71d9c00f6f37d6b97d5e5ef8b3c3840426a475c8f70f4"},
+    {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:9aa1a67bbf0f957bbe096375887b2505f5d8ae16bf04488e8b0f334c36e31360"},
+    {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:98a2636994f943b871786c9e82bfe7883ecdaba2ef5df54e1450fa9869d1f756"},
+    {file = "regex-2023.12.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37f8e93a81fc5e5bd8db7e10e62dc64261bcd88f8d7e6640aaebe9bc180d9ce2"},
+    {file = "regex-2023.12.25-cp38-cp38-win32.whl", hash = "sha256:d78bd484930c1da2b9679290a41cdb25cc127d783768a0369d6b449e72f88beb"},
+    {file = "regex-2023.12.25-cp38-cp38-win_amd64.whl", hash = "sha256:b521dcecebc5b978b447f0f69b5b7f3840eac454862270406a39837ffae4e697"},
+    {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f7bc09bc9c29ebead055bcba136a67378f03d66bf359e87d0f7c759d6d4ffa31"},
+    {file = "regex-2023.12.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e14b73607d6231f3cc4622809c196b540a6a44e903bcfad940779c80dffa7be7"},
+    {file = "regex-2023.12.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9eda5f7a50141291beda3edd00abc2d4a5b16c29c92daf8d5bd76934150f3edc"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc6bb9aa69aacf0f6032c307da718f61a40cf970849e471254e0e91c56ffca95"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298dc6354d414bc921581be85695d18912bea163a8b23cac9a2562bbcd5088b1"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f4e475a80ecbd15896a976aa0b386c5525d0ed34d5c600b6d3ebac0a67c7ddf"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531ac6cf22b53e0696f8e1d56ce2396311254eb806111ddd3922c9d937151dae"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22f3470f7524b6da61e2020672df2f3063676aff444db1daa283c2ea4ed259d6"},
+    {file = "regex-2023.12.25-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:89723d2112697feaa320c9d351e5f5e7b841e83f8b143dba8e2d2b5f04e10923"},
+    {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ecf44ddf9171cd7566ef1768047f6e66975788258b1c6c6ca78098b95cf9a3d"},
+    {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:905466ad1702ed4acfd67a902af50b8db1feeb9781436372261808df7a2a7bca"},
+    {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:4558410b7a5607a645e9804a3e9dd509af12fb72b9825b13791a37cd417d73a5"},
+    {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7e316026cc1095f2a3e8cc012822c99f413b702eaa2ca5408a513609488cb62f"},
+    {file = "regex-2023.12.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3b1de218d5375cd6ac4b5493e0b9f3df2be331e86520f23382f216c137913d20"},
+    {file = "regex-2023.12.25-cp39-cp39-win32.whl", hash = "sha256:11a963f8e25ab5c61348d090bf1b07f1953929c13bd2309a0662e9ff680763c9"},
+    {file = "regex-2023.12.25-cp39-cp39-win_amd64.whl", hash = "sha256:e693e233ac92ba83a87024e1d32b5f9ab15ca55ddd916d878146f4e3406b5c91"},
+    {file = "regex-2023.12.25.tar.gz", hash = "sha256:29171aa128da69afdf4bde412d5bedc335f2ca8fcfe4489038577d05f16181e5"},
 ]
 
 [[package]]
@@ -2042,45 +2131,45 @@ torch = ["numpy (>=1.21.6)", "torch (>=1.10)"]
 
 [[package]]
 name = "scipy"
-version = "1.11.4"
+version = "1.12.0"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "scipy-1.11.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc9a714581f561af0848e6b69947fda0614915f072dfd14142ed1bfe1b806710"},
-    {file = "scipy-1.11.4-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cf00bd2b1b0211888d4dc75656c0412213a8b25e80d73898083f402b50f47e41"},
-    {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9999c008ccf00e8fbcce1236f85ade5c569d13144f77a1946bef8863e8f6eb4"},
-    {file = "scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:933baf588daa8dc9a92c20a0be32f56d43faf3d1a60ab11b3f08c356430f6e56"},
-    {file = "scipy-1.11.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8fce70f39076a5aa62e92e69a7f62349f9574d8405c0a5de6ed3ef72de07f446"},
-    {file = "scipy-1.11.4-cp310-cp310-win_amd64.whl", hash = "sha256:6550466fbeec7453d7465e74d4f4b19f905642c89a7525571ee91dd7adabb5a3"},
-    {file = "scipy-1.11.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f313b39a7e94f296025e3cffc2c567618174c0b1dde173960cf23808f9fae4be"},
-    {file = "scipy-1.11.4-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1b7c3dca977f30a739e0409fb001056484661cb2541a01aba0bb0029f7b68db8"},
-    {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00150c5eae7b610c32589dda259eacc7c4f1665aedf25d921907f4d08a951b1c"},
-    {file = "scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:530f9ad26440e85766509dbf78edcfe13ffd0ab7fec2560ee5c36ff74d6269ff"},
-    {file = "scipy-1.11.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5e347b14fe01003d3b78e196e84bd3f48ffe4c8a7b8a1afbcb8f5505cb710993"},
-    {file = "scipy-1.11.4-cp311-cp311-win_amd64.whl", hash = "sha256:acf8ed278cc03f5aff035e69cb511741e0418681d25fbbb86ca65429c4f4d9cd"},
-    {file = "scipy-1.11.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:028eccd22e654b3ea01ee63705681ee79933652b2d8f873e7949898dda6d11b6"},
-    {file = "scipy-1.11.4-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c6ff6ef9cc27f9b3db93a6f8b38f97387e6e0591600369a297a50a8e96e835d"},
-    {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b030c6674b9230d37c5c60ab456e2cf12f6784596d15ce8da9365e70896effc4"},
-    {file = "scipy-1.11.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad669df80528aeca5f557712102538f4f37e503f0c5b9541655016dd0932ca79"},
-    {file = "scipy-1.11.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce7fff2e23ab2cc81ff452a9444c215c28e6305f396b2ba88343a567feec9660"},
-    {file = "scipy-1.11.4-cp312-cp312-win_amd64.whl", hash = "sha256:36750b7733d960d7994888f0d148d31ea3017ac15eef664194b4ef68d36a4a97"},
-    {file = "scipy-1.11.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e619aba2df228a9b34718efb023966da781e89dd3d21637b27f2e54db0410d7"},
-    {file = "scipy-1.11.4-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:f3cd9e7b3c2c1ec26364856f9fbe78695fe631150f94cd1c22228456404cf1ec"},
-    {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d10e45a6c50211fe256da61a11c34927c68f277e03138777bdebedd933712fea"},
-    {file = "scipy-1.11.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91af76a68eeae0064887a48e25c4e616fa519fa0d38602eda7e0f97d65d57937"},
-    {file = "scipy-1.11.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6df1468153a31cf55ed5ed39647279beb9cfb5d3f84369453b49e4b8502394fd"},
-    {file = "scipy-1.11.4-cp39-cp39-win_amd64.whl", hash = "sha256:ee410e6de8f88fd5cf6eadd73c135020bfbbbdfcd0f6162c36a7638a1ea8cc65"},
-    {file = "scipy-1.11.4.tar.gz", hash = "sha256:90a2b78e7f5733b9de748f589f09225013685f9b218275257f8a8168ededaeaa"},
+    {file = "scipy-1.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:78e4402e140879387187f7f25d91cc592b3501a2e51dfb320f48dfb73565f10b"},
+    {file = "scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5f00ebaf8de24d14b8449981a2842d404152774c1a1d880c901bf454cb8e2a1"},
+    {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563"},
+    {file = "scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e32847e08da8d895ce09d108a494d9eb78974cf6de23063f93306a3e419960c"},
+    {file = "scipy-1.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4c1020cad92772bf44b8e4cdabc1df5d87376cb219742549ef69fc9fd86282dd"},
+    {file = "scipy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:75ea2a144096b5e39402e2ff53a36fecfd3b960d786b7efd3c180e29c39e53f2"},
+    {file = "scipy-1.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:408c68423f9de16cb9e602528be4ce0d6312b05001f3de61fe9ec8b1263cad08"},
+    {file = "scipy-1.12.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5adfad5dbf0163397beb4aca679187d24aec085343755fcdbdeb32b3679f254c"},
+    {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3003652496f6e7c387b1cf63f4bb720951cfa18907e998ea551e6de51a04467"},
+    {file = "scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b8066bce124ee5531d12a74b617d9ac0ea59245246410e19bca549656d9a40a"},
+    {file = "scipy-1.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8bee4993817e204d761dba10dbab0774ba5a8612e57e81319ea04d84945375ba"},
+    {file = "scipy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a24024d45ce9a675c1fb8494e8e5244efea1c7a09c60beb1eeb80373d0fecc70"},
+    {file = "scipy-1.12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7e76cc48638228212c747ada851ef355c2bb5e7f939e10952bc504c11f4e372"},
+    {file = "scipy-1.12.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f7ce148dffcd64ade37b2df9315541f9adad6efcaa86866ee7dd5db0c8f041c3"},
+    {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c39f92041f490422924dfdb782527a4abddf4707616e07b021de33467f917bc"},
+    {file = "scipy-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ebda398f86e56178c2fa94cad15bf457a218a54a35c2a7b4490b9f9cb2676c"},
+    {file = "scipy-1.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:95e5c750d55cf518c398a8240571b0e0782c2d5a703250872f36eaf737751338"},
+    {file = "scipy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e646d8571804a304e1da01040d21577685ce8e2db08ac58e543eaca063453e1c"},
+    {file = "scipy-1.12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:913d6e7956c3a671de3b05ccb66b11bc293f56bfdef040583a7221d9e22a2e35"},
+    {file = "scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba1b0c7256ad75401c73e4b3cf09d1f176e9bd4248f0d3112170fb2ec4db067"},
+    {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:730badef9b827b368f351eacae2e82da414e13cf8bd5051b4bdfd720271a5371"},
+    {file = "scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490"},
+    {file = "scipy-1.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:196ebad3a4882081f62a5bf4aeb7326aa34b110e533aab23e4374fcccb0890dc"},
+    {file = "scipy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:b360f1b6b2f742781299514e99ff560d1fe9bd1bff2712894b52abe528d1fd1e"},
+    {file = "scipy-1.12.0.tar.gz", hash = "sha256:4bf5abab8a36d20193c698b0f1fc282c1d083c94723902c447e5d2f1780936a3"},
 ]
 
 [package.dependencies]
-numpy = ">=1.21.6,<1.28.0"
+numpy = ">=1.22.4,<1.29.0"
 
 [package.extras]
 dev = ["click", "cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"]
 doc = ["jupytext", "matplotlib (>2)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"]
-test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+test = ["asv", "gmpy2", "hypothesis", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
 
 [[package]]
 name = "sentencepiece"
@@ -2138,13 +2227,13 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "69.0.2"
+version = "69.0.3"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"},
-    {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"},
+    {file = "setuptools-69.0.3-py3-none-any.whl", hash = "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05"},
+    {file = "setuptools-69.0.3.tar.gz", hash = "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78"},
 ]
 
 [package.extras]
@@ -2190,109 +2279,121 @@ files = [
 
 [[package]]
 name = "tokenizers"
-version = "0.15.0"
+version = "0.15.1"
 description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tokenizers-0.15.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:cd3cd0299aaa312cd2988957598f80becd04d5a07338741eca076057a2b37d6e"},
-    {file = "tokenizers-0.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a922c492c721744ee175f15b91704be2d305569d25f0547c77cd6c9f210f9dc"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:331dd786d02fc38698f835fff61c99480f98b73ce75a4c65bd110c9af5e4609a"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88dd0961c437d413ab027f8b115350c121d49902cfbadf08bb8f634b15fa1814"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6fdcc55339df7761cd52e1fbe8185d3b3963bc9e3f3545faa6c84f9e8818259a"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1480b0051d8ab5408e8e4db2dc832f7082ea24aa0722c427bde2418c6f3bd07"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9855e6c258918f9cf62792d4f6ddfa6c56dccd8c8118640f867f6393ecaf8bd7"},
-    {file = "tokenizers-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de9529fe75efcd54ba8d516aa725e1851df9199f0669b665c55e90df08f5af86"},
-    {file = "tokenizers-0.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8edcc90a36eab0705fe9121d6c77c6e42eeef25c7399864fd57dfb27173060bf"},
-    {file = "tokenizers-0.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ae17884aafb3e94f34fb7cfedc29054f5f54e142475ebf8a265a4e388fee3f8b"},
-    {file = "tokenizers-0.15.0-cp310-none-win32.whl", hash = "sha256:9a3241acdc9b44cff6e95c4a55b9be943ef3658f8edb3686034d353734adba05"},
-    {file = "tokenizers-0.15.0-cp310-none-win_amd64.whl", hash = "sha256:4b31807cb393d6ea31926b307911c89a1209d5e27629aa79553d1599c8ffdefe"},
-    {file = "tokenizers-0.15.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:af7e9be8c05d30bb137b9fd20f9d99354816599e5fd3d58a4b1e28ba3b36171f"},
-    {file = "tokenizers-0.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c3d7343fa562ea29661783344a2d83662db0d3d17a6fa6a403cac8e512d2d9fd"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:32371008788aeeb0309a9244809a23e4c0259625e6b74a103700f6421373f395"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9db64c7c9954fbae698884c5bb089764edc549731e5f9b7fa1dd4e4d78d77f"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dbed5944c31195514669cf6381a0d8d47f164943000d10f93d6d02f0d45c25e0"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aab16c4a26d351d63e965b0c792f5da7227a37b69a6dc6d922ff70aa595b1b0c"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c2b60b12fdd310bf85ce5d7d3f823456b9b65eed30f5438dd7761879c495983"},
-    {file = "tokenizers-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0344d6602740e44054a9e5bbe9775a5e149c4dddaff15959bb07dcce95a5a859"},
-    {file = "tokenizers-0.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4525f6997d81d9b6d9140088f4f5131f6627e4c960c2c87d0695ae7304233fc3"},
-    {file = "tokenizers-0.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:65975094fef8cc68919644936764efd2ce98cf1bacbe8db2687155d2b0625bee"},
-    {file = "tokenizers-0.15.0-cp311-none-win32.whl", hash = "sha256:ff5d2159c5d93015f5a4542aac6c315506df31853123aa39042672031768c301"},
-    {file = "tokenizers-0.15.0-cp311-none-win_amd64.whl", hash = "sha256:2dd681b53cf615e60a31a115a3fda3980e543d25ca183797f797a6c3600788a3"},
-    {file = "tokenizers-0.15.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:c9cce6ee149a3d703f86877bc2a6d997e34874b2d5a2d7839e36b2273f31d3d9"},
-    {file = "tokenizers-0.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a0a94bc3370e6f1cc8a07a8ae867ce13b7c1b4291432a773931a61f256d44ea"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:309cfcccfc7e502cb1f1de2c9c1c94680082a65bfd3a912d5a5b2c90c677eb60"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8413e994dd7d875ab13009127fc85633916c71213917daf64962bafd488f15dc"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d0ebf9430f901dbdc3dcb06b493ff24a3644c9f88c08e6a1d6d0ae2228b9b818"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10361e9c7864b22dd791ec5126327f6c9292fb1d23481d4895780688d5e298ac"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:babe42635b8a604c594bdc56d205755f73414fce17ba8479d142a963a6c25cbc"},
-    {file = "tokenizers-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3768829861e964c7a4556f5f23307fce6a23872c2ebf030eb9822dbbbf7e9b2a"},
-    {file = "tokenizers-0.15.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9c91588a630adc88065e1c03ac6831e3e2112558869b9ebcb2b8afd8a14c944d"},
-    {file = "tokenizers-0.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:77606994e793ca54ecf3a3619adc8a906a28ca223d9354b38df41cb8766a0ed6"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:6fe143939f3b596681922b2df12a591a5b010e7dcfbee2202482cd0c1c2f2459"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:b7bee0f1795e3e3561e9a557061b1539e5255b8221e3f928f58100282407e090"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5d37e7f4439b4c46192ab4f2ff38ab815e4420f153caa13dec9272ef14403d34"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caadf255cf7f951b38d10097836d1f3bcff4aeaaffadfdf748bab780bf5bff95"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05accb9162bf711a941b1460b743d62fec61c160daf25e53c5eea52c74d77814"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26a2ef890740127cb115ee5260878f4a677e36a12831795fd7e85887c53b430b"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e54c5f26df14913620046b33e822cb3bcd091a332a55230c0e63cc77135e2169"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669b8ed653a578bcff919566631156f5da3aab84c66f3c0b11a6281e8b4731c7"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0ea480d943297df26f06f508dab6e012b07f42bf3dffdd36e70799368a5f5229"},
-    {file = "tokenizers-0.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bc80a0a565ebfc7cd89de7dd581da8c2b3238addfca6280572d27d763f135f2f"},
-    {file = "tokenizers-0.15.0-cp37-none-win32.whl", hash = "sha256:cdd945e678bbdf4517d5d8de66578a5030aeefecdb46f5320b034de9cad8d4dd"},
-    {file = "tokenizers-0.15.0-cp37-none-win_amd64.whl", hash = "sha256:1ab96ab7dc706e002c32b2ea211a94c1c04b4f4de48354728c3a6e22401af322"},
-    {file = "tokenizers-0.15.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:f21c9eb71c9a671e2a42f18b456a3d118e50c7f0fc4dd9fa8f4eb727fea529bf"},
-    {file = "tokenizers-0.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2a5f4543a35889679fc3052086e69e81880b2a5a28ff2a52c5a604be94b77a3f"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f8aa81afec893e952bd39692b2d9ef60575ed8c86fce1fd876a06d2e73e82dca"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1574a5a4af22c3def93fe8fe4adcc90a39bf5797ed01686a4c46d1c3bc677d2f"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c7982fd0ec9e9122d03b209dac48cebfea3de0479335100ef379a9a959b9a5a"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d16b647032df2ce2c1f9097236e046ea9fedd969b25637b9d5d734d78aa53b"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b3cdf29e6f9653da330515dc8fa414be5a93aae79e57f8acc50d4028dd843edf"},
-    {file = "tokenizers-0.15.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7286f3df10de840867372e3e64b99ef58c677210e3ceb653cd0e740a5c53fe78"},
-    {file = "tokenizers-0.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aabc83028baa5a36ce7a94e7659250f0309c47fa4a639e5c2c38e6d5ea0de564"},
-    {file = "tokenizers-0.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:72f78b0e0e276b1fc14a672fa73f3acca034ba8db4e782124a2996734a9ba9cf"},
-    {file = "tokenizers-0.15.0-cp38-none-win32.whl", hash = "sha256:9680b0ecc26e7e42f16680c1aa62e924d58d1c2dd992707081cc10a374896ea2"},
-    {file = "tokenizers-0.15.0-cp38-none-win_amd64.whl", hash = "sha256:f17cbd88dab695911cbdd385a5a7e3709cc61dff982351f5d1b5939f074a2466"},
-    {file = "tokenizers-0.15.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:3661862df7382c5eb23ac4fbf7c75e69b02dc4f5784e4c5a734db406b5b24596"},
-    {file = "tokenizers-0.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3045d191dad49647f5a5039738ecf1c77087945c7a295f7bcf051c37067e883"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9fcaad9ab0801f14457d7c820d9f246b5ab590c407fc6b073819b1573097aa7"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a79f17027f24fe9485701c8dbb269b9c713954ec3bdc1e7075a66086c0c0cd3c"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01a3aa332abc4bee7640563949fcfedca4de8f52691b3b70f2fc6ca71bfc0f4e"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05b83896a893cdfedad8785250daa3ba9f0504848323471524d4783d7291661e"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbbf2489fcf25d809731ba2744ff278dd07d9eb3f8b7482726bd6cae607073a4"},
-    {file = "tokenizers-0.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab806ad521a5e9de38078b7add97589c313915f6f5fec6b2f9f289d14d607bd6"},
-    {file = "tokenizers-0.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4a522612d5c88a41563e3463226af64e2fa00629f65cdcc501d1995dd25d23f5"},
-    {file = "tokenizers-0.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e58a38c4e6075810bdfb861d9c005236a72a152ebc7005941cc90d1bbf16aca9"},
-    {file = "tokenizers-0.15.0-cp39-none-win32.whl", hash = "sha256:b8034f1041fd2bd2b84ff9f4dc4ae2e1c3b71606820a9cd5c562ebd291a396d1"},
-    {file = "tokenizers-0.15.0-cp39-none-win_amd64.whl", hash = "sha256:edde9aa964145d528d0e0dbf14f244b8a85ebf276fb76869bc02e2530fa37a96"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:309445d10d442b7521b98083dc9f0b5df14eca69dbbfebeb98d781ee2cef5d30"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d3125a6499226d4d48efc54f7498886b94c418e93a205b673bc59364eecf0804"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ed56ddf0d54877bb9c6d885177db79b41576e61b5ef6defeb579dcb803c04ad5"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b22cd714706cc5b18992a232b023f736e539495f5cc61d2d28d176e55046f6c"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fac2719b1e9bc8e8e7f6599b99d0a8e24f33d023eb8ef644c0366a596f0aa926"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:85ddae17570ec7e5bfaf51ffa78d044f444a8693e1316e1087ee6150596897ee"},
-    {file = "tokenizers-0.15.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:76f1bed992e396bf6f83e3df97b64ff47885e45e8365f8983afed8556a0bc51f"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3bb0f4df6dce41a1c7482087b60d18c372ef4463cb99aa8195100fcd41e0fd64"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:22c27672c27a059a5f39ff4e49feed8c7f2e1525577c8a7e3978bd428eb5869d"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78104f5d035c9991f92831fc0efe9e64a05d4032194f2a69f67aaa05a4d75bbb"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a40b73dc19d82c3e3ffb40abdaacca8fbc95eeb26c66b7f9f860aebc07a73998"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d801d1368188c74552cd779b1286e67cb9fd96f4c57a9f9a2a09b6def9e1ab37"},
-    {file = "tokenizers-0.15.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82641ffb13a4da1293fcc9f437d457647e60ed0385a9216cd135953778b3f0a1"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:160f9d1810f2c18fffa94aa98bf17632f6bd2dabc67fcb01a698ca80c37d52ee"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:8d7d6eea831ed435fdeeb9bcd26476226401d7309d115a710c65da4088841948"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f6456bec6c557d63d8ec0023758c32f589e1889ed03c055702e84ce275488bed"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1eef39a502fad3bf104b9e1906b4fb0cee20e44e755e51df9a98f8922c3bf6d4"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1e4664c5b797e093c19b794bbecc19d2367e782b4a577d8b7c1821db5dc150d"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ca003fb5f3995ff5cf676db6681b8ea5d54d3b30bea36af1120e78ee1a4a4cdf"},
-    {file = "tokenizers-0.15.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7f17363141eb0c53752c89e10650b85ef059a52765d0802ba9613dbd2d21d425"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:8a765db05581c7d7e1280170f2888cda351760d196cc059c37ea96f121125799"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2a0dd641a72604486cd7302dd8f87a12c8a9b45e1755e47d2682733f097c1af5"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0a1a3c973e4dc97797fc19e9f11546c95278ffc55c4492acb742f69e035490bc"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4fab75642aae4e604e729d6f78e0addb9d7e7d49e28c8f4d16b24da278e5263"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65f80be77f6327a86d8fd35a4467adcfe6174c159b4ab52a1a8dd4c6f2d7d9e1"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:a8da7533dbe66b88afd430c56a2f2ce1fd82e2681868f857da38eeb3191d7498"},
-    {file = "tokenizers-0.15.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa8eb4584fc6cbe6a84d7a7864be3ed28e23e9fd2146aa8ef1814d579df91958"},
-    {file = "tokenizers-0.15.0.tar.gz", hash = "sha256:10c7e6e7b4cabd757da59e93f5f8d1126291d16f8b54f28510825ef56a3e5d0e"},
+    {file = "tokenizers-0.15.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:32c9491dd1bcb33172c26b454dbd607276af959b9e78fa766e2694cafab3103c"},
+    {file = "tokenizers-0.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29a1b784b870a097e7768f8c20c2dd851e2c75dad3efdae69a79d3e7f1d614d5"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0049fbe648af04148b08cb211994ce8365ee628ce49724b56aaefd09a3007a78"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e84b3c235219e75e24de6b71e6073cd2c8d740b14d88e4c6d131b90134e3a338"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8cc575769ea11d074308c6d71cb10b036cdaec941562c07fc7431d956c502f0e"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22bf28f299c4158e6d0b5eaebddfd500c4973d947ffeaca8bcbe2e8c137dff0b"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:506555f98361db9c74e1323a862d77dcd7d64c2058829a368bf4159d986e339f"},
+    {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7061b0a28ade15906f5b2ec8c48d3bdd6e24eca6b427979af34954fbe31d5cef"},
+    {file = "tokenizers-0.15.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ed5e35507b7a0e2aac3285c4f5e37d4ec5cfc0e5825b862b68a0aaf2757af52"},
+    {file = "tokenizers-0.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c9df9247df0de6509dd751b1c086e5f124b220133b5c883bb691cb6fb3d786f"},
+    {file = "tokenizers-0.15.1-cp310-none-win32.whl", hash = "sha256:dd999af1b4848bef1b11d289f04edaf189c269d5e6afa7a95fa1058644c3f021"},
+    {file = "tokenizers-0.15.1-cp310-none-win_amd64.whl", hash = "sha256:39d06a57f7c06940d602fad98702cf7024c4eee7f6b9fe76b9f2197d5a4cc7e2"},
+    {file = "tokenizers-0.15.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8ad034eb48bf728af06915e9294871f72fcc5254911eddec81d6df8dba1ce055"},
+    {file = "tokenizers-0.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea9ede7c42f8fa90f31bfc40376fd91a7d83a4aa6ad38e6076de961d48585b26"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:b85d6fe1a20d903877aa0ef32ef6b96e81e0e48b71c206d6046ce16094de6970"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a7d44f656320137c7d643b9c7dcc1814763385de737fb98fd2643880910f597"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd244bd0793cdacf27ee65ec3db88c21f5815460e8872bbeb32b040469d6774e"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3f4a36e371b3cb1123adac8aeeeeab207ad32f15ed686d9d71686a093bb140"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2921a53966afb29444da98d56a6ccbef23feb3b0c0f294b4e502370a0a64f25"},
+    {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f49068cf51f49c231067f1a8c9fc075ff960573f6b2a956e8e1b0154fb638ea5"},
+    {file = "tokenizers-0.15.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0ab1a22f20eaaab832ab3b00a0709ca44a0eb04721e580277579411b622c741c"},
+    {file = "tokenizers-0.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:671268f24b607c4adc6fa2b5b580fd4211b9f84b16bd7f46d62f8e5be0aa7ba4"},
+    {file = "tokenizers-0.15.1-cp311-none-win32.whl", hash = "sha256:a4f03e33d2bf7df39c8894032aba599bf90f6f6378e683a19d28871f09bb07fc"},
+    {file = "tokenizers-0.15.1-cp311-none-win_amd64.whl", hash = "sha256:30f689537bcc7576d8bd4daeeaa2cb8f36446ba2f13f421b173e88f2d8289c4e"},
+    {file = "tokenizers-0.15.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f3a379dd0898a82ea3125e8f9c481373f73bffce6430d4315f0b6cd5547e409"},
+    {file = "tokenizers-0.15.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d870ae58bba347d38ac3fc8b1f662f51e9c95272d776dd89f30035c83ee0a4f"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d6d28e0143ec2e253a8a39e94bf1d24776dbe73804fa748675dbffff4a5cd6d8"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61ae9ac9f44e2da128ee35db69489883b522f7abe033733fa54eb2de30dac23d"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8e322a47e29128300b3f7749a03c0ec2bce0a3dc8539ebff738d3f59e233542"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:760334f475443bc13907b1a8e1cb0aeaf88aae489062546f9704dce6c498bfe2"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b173753d4aca1e7d0d4cb52b5e3ffecfb0ca014e070e40391b6bb4c1d6af3f2"},
+    {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82c1f13d457c8f0ab17e32e787d03470067fe8a3b4d012e7cc57cb3264529f4a"},
+    {file = "tokenizers-0.15.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:425b46ceff4505f20191df54b50ac818055d9d55023d58ae32a5d895b6f15bb0"},
+    {file = "tokenizers-0.15.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:681ac6ba3b4fdaf868ead8971221a061f580961c386e9732ea54d46c7b72f286"},
+    {file = "tokenizers-0.15.1-cp312-none-win32.whl", hash = "sha256:f2272656063ccfba2044df2115095223960d80525d208e7a32f6c01c351a6f4a"},
+    {file = "tokenizers-0.15.1-cp312-none-win_amd64.whl", hash = "sha256:9abe103203b1c6a2435d248d5ff4cceebcf46771bfbc4957a98a74da6ed37674"},
+    {file = "tokenizers-0.15.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2ce9ed5c8ef26b026a66110e3c7b73d93ec2d26a0b1d0ea55ddce61c0e5f446f"},
+    {file = "tokenizers-0.15.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89b24d366137986c3647baac29ef902d2d5445003d11c30df52f1bd304689aeb"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0faebedd01b413ab777ca0ee85914ed8b031ea5762ab0ea60b707ce8b9be6842"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdbd9dfcdad4f3b95d801f768e143165165055c18e44ca79a8a26de889cd8e85"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:97194324c12565b07e9993ca9aa813b939541185682e859fb45bb8d7d99b3193"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:485e43e2cc159580e0d83fc919ec3a45ae279097f634b1ffe371869ffda5802c"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:191d084d60e3589d6420caeb3f9966168269315f8ec7fbc3883122dc9d99759d"},
+    {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01c28cc8d7220634a75b14c53f4fc9d1b485f99a5a29306a999c115921de2897"},
+    {file = "tokenizers-0.15.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:325212027745d3f8d5d5006bb9e5409d674eb80a184f19873f4f83494e1fdd26"},
+    {file = "tokenizers-0.15.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3c5573603c36ce12dbe318bcfb490a94cad2d250f34deb2f06cb6937957bbb71"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:1441161adb6d71a15a630d5c1d8659d5ebe41b6b209586fbeea64738e58fcbb2"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:382a8d0c31afcfb86571afbfefa37186df90865ce3f5b731842dab4460e53a38"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e76959783e3f4ec73b3f3d24d4eec5aa9225f0bee565c48e77f806ed1e048f12"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:401df223e5eb927c5961a0fc6b171818a2bba01fb36ef18c3e1b69b8cd80e591"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52606c233c759561a16e81b2290a7738c3affac7a0b1f0a16fe58dc22e04c7d"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b72c658bbe5a05ed8bc2ac5ad782385bfd743ffa4bc87d9b5026341e709c6f44"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25f5643a2f005c42f0737a326c6c6bdfedfdc9a994b10a1923d9c3e792e4d6a6"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c5b6f633999d6b42466bbfe21be2e26ad1760b6f106967a591a41d8cbca980e"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ceb5c9ad11a015150b545c1a11210966a45b8c3d68a942e57cf8938c578a77ca"},
+    {file = "tokenizers-0.15.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bedd4ce0c4872db193444c395b11c7697260ce86a635ab6d48102d76be07d324"},
+    {file = "tokenizers-0.15.1-cp37-none-win32.whl", hash = "sha256:cd6caef6c14f5ed6d35f0ddb78eab8ca6306d0cd9870330bccff72ad014a6f42"},
+    {file = "tokenizers-0.15.1-cp37-none-win_amd64.whl", hash = "sha256:d2bd7af78f58d75a55e5df61efae164ab9200c04b76025f9cc6eeb7aff3219c2"},
+    {file = "tokenizers-0.15.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:59b3ca6c02e0bd5704caee274978bd055de2dff2e2f39dadf536c21032dfd432"},
+    {file = "tokenizers-0.15.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:48fe21b67c22583bed71933a025fd66b1f5cfae1baefa423c3d40379b5a6e74e"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3d190254c66a20fb1efbdf035e6333c5e1f1c73b1f7bfad88f9c31908ac2c2c4"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef90c8f5abf17d48d6635f5fd92ad258acd1d0c2d920935c8bf261782cfe7c8"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fac011ef7da3357aa7eb19efeecf3d201ede9618f37ddedddc5eb809ea0963ca"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:574ec5b3e71d1feda6b0ecac0e0445875729b4899806efbe2b329909ec75cb50"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aca16c3c0637c051a59ea99c4253f16fbb43034fac849076a7e7913b2b9afd2d"},
+    {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a6f238fc2bbfd3e12e8529980ec1624c7e5b69d4e959edb3d902f36974f725a"},
+    {file = "tokenizers-0.15.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:587e11a26835b73c31867a728f32ca8a93c9ded4a6cd746516e68b9d51418431"},
+    {file = "tokenizers-0.15.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6456e7ad397352775e2efdf68a9ec5d6524bbc4543e926eef428d36de627aed4"},
+    {file = "tokenizers-0.15.1-cp38-none-win32.whl", hash = "sha256:614f0da7dd73293214bd143e6221cafd3f7790d06b799f33a987e29d057ca658"},
+    {file = "tokenizers-0.15.1-cp38-none-win_amd64.whl", hash = "sha256:a4fa0a20d9f69cc2bf1cfce41aa40588598e77ec1d6f56bf0eb99769969d1ede"},
+    {file = "tokenizers-0.15.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8d3f18a45e0cf03ce193d5900460dc2430eec4e14c786e5d79bddba7ea19034f"},
+    {file = "tokenizers-0.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:38dbd6c38f88ad7d5dc5d70c764415d38fe3bcd99dc81638b572d093abc54170"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:777286b1f7e52de92aa4af49fe31046cfd32885d1bbaae918fab3bba52794c33"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58d4d550a3862a47dd249892d03a025e32286eb73cbd6bc887fb8fb64bc97165"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4eda68ce0344f35042ae89220b40a0007f721776b727806b5c95497b35714bb7"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cd33d15f7a3a784c3b665cfe807b8de3c6779e060349bd5005bb4ae5bdcb437"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a1aa370f978ac0bfb50374c3a40daa93fd56d47c0c70f0c79607fdac2ccbb42"},
+    {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:241482b940340fff26a2708cb9ba383a5bb8a2996d67a0ff2c4367bf4b86cc3a"},
+    {file = "tokenizers-0.15.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:68f30b05f46a4d9aba88489eadd021904afe90e10a7950e28370d6e71b9db021"},
+    {file = "tokenizers-0.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5a3c5d8025529670462b881b7b2527aacb6257398c9ec8e170070432c3ae3a82"},
+    {file = "tokenizers-0.15.1-cp39-none-win32.whl", hash = "sha256:74d1827830f60a9d78da8f6d49a1fbea5422ce0eea42e2617877d23380a7efbc"},
+    {file = "tokenizers-0.15.1-cp39-none-win_amd64.whl", hash = "sha256:9ff499923e4d6876d6b6a63ea84a56805eb35e91dd89b933a7aee0c56a3838c6"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b3aa007a0f4408f62a8471bdaa3faccad644cbf2622639f2906b4f9b5339e8b8"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f3d4176fa93d8b2070db8f3c70dc21106ae6624fcaaa334be6bdd3a0251e729e"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1d0e463655ef8b2064df07bd4a445ed7f76f6da3b286b4590812587d42f80e89"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:089138fd0351b62215c462a501bd68b8df0e213edcf99ab9efd5dba7b4cb733e"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e563ac628f5175ed08e950430e2580e544b3e4b606a0995bb6b52b3a3165728"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:244dcc28c5fde221cb4373961b20da30097669005b122384d7f9f22752487a46"},
+    {file = "tokenizers-0.15.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d82951d46052dddae1369e68ff799a0e6e29befa9a0b46e387ae710fd4daefb0"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7b14296bc9059849246ceb256ffbe97f8806a9b5d707e0095c22db312f4fc014"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0309357bb9b6c8d86cdf456053479d7112074b470651a997a058cd7ad1c4ea57"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083f06e9d8d01b70b67bcbcb7751b38b6005512cce95808be6bf34803534a7e7"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85288aea86ada579789447f0dcec108ebef8da4b450037eb4813d83e4da9371e"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:385e6fcb01e8de90c1d157ae2a5338b23368d0b1c4cc25088cdca90147e35d17"},
+    {file = "tokenizers-0.15.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:60067edfcbf7d6cd448ac47af41ec6e84377efbef7be0c06f15a7c1dd069e044"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f7e37f89acfe237d4eaf93c3b69b0f01f407a7a5d0b5a8f06ba91943ea3cf10"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:6a63a15b523d42ebc1f4028e5a568013388c2aefa4053a263e511cb10aaa02f1"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2417d9e4958a6c2fbecc34c27269e74561c55d8823bf914b422e261a11fdd5fd"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8550974bace6210e41ab04231e06408cf99ea4279e0862c02b8d47e7c2b2828"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:194ba82129b171bcd29235a969e5859a93e491e9b0f8b2581f500f200c85cfdd"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1bfd95eef8b01e6c0805dbccc8eaf41d8c5a84f0cce72c0ab149fe76aae0bce6"},
+    {file = "tokenizers-0.15.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b87a15dd72f8216b03c151e3dace00c75c3fe7b0ee9643c25943f31e582f1a34"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6ac22f358a0c2a6c685be49136ce7ea7054108986ad444f567712cf274b34cd8"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e9d1f046a9b9d9a95faa103f07db5921d2c1c50f0329ebba4359350ee02b18b"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a0fd30a4b74485f6a7af89fffb5fb84d6d5f649b3e74f8d37f624cc9e9e97cf"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80e45dc206b9447fa48795a1247c69a1732d890b53e2cc51ba42bc2fefa22407"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eaff56ef3e218017fa1d72007184401f04cb3a289990d2b6a0a76ce71c95f96"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b41dc107e4a4e9c95934e79b025228bbdda37d9b153d8b084160e88d5e48ad6f"},
+    {file = "tokenizers-0.15.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1922b8582d0c33488764bcf32e80ef6054f515369e70092729c928aae2284bc2"},
+    {file = "tokenizers-0.15.1.tar.gz", hash = "sha256:c0a331d6d5a3d6e97b7f99f562cee8d56797180797bc55f12070e495e717c980"},
 ]
 
 [package.dependencies]
@@ -2389,13 +2490,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "4.36.1"
+version = "4.37.1"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "transformers-4.36.1-py3-none-any.whl", hash = "sha256:0e309d03634885f02d46801ec4f2c3fc1d614a5b9ebde608181f3e842bac53b8"},
-    {file = "transformers-4.36.1.tar.gz", hash = "sha256:28e55952d9bed68f06cf45a3d29cc480679b528afe944e68f8cf6c799e428759"},
+    {file = "transformers-4.37.1-py3-none-any.whl", hash = "sha256:05e4c4bf94f74addeb716bc83517f49d55df1e9022db3d5b027c801e9a410ebf"},
+    {file = "transformers-4.37.1.tar.gz", hash = "sha256:9843368d97fd7ac30126664743adc65e8e5be930da7d66342172e97bd1243e2d"},
 ]
 
 [package.dependencies]
@@ -2412,16 +2513,16 @@ tqdm = ">=4.27"
 
 [package.extras]
 accelerate = ["accelerate (>=0.21.0)"]
-agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
+agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.11,!=1.12.0)"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 codecarbon = ["codecarbon (==1.2.0)"]
 deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
 deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
 dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.19)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-docs = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+docs = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision"]
 docs-specific = ["hf-doc-builder"]
 flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
 flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
@@ -2429,7 +2530,7 @@ ftfy = ["ftfy"]
 integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
 ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
 modelcreation = ["cookiecutter (==1.7.3)"]
-natten = ["natten (>=0.14.6)"]
+natten = ["natten (>=0.14.6,<0.15.0)"]
 onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
 onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
 optuna = ["optuna"]
@@ -2448,10 +2549,10 @@ tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,
 tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 timm = ["timm"]
 tokenizers = ["tokenizers (>=0.14,<0.19)"]
-torch = ["accelerate (>=0.21.0)", "torch (>=1.10,!=1.12.0)"]
+torch = ["accelerate (>=0.21.0)", "torch (>=1.11,!=1.12.0)"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.19)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"]
+torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "tqdm (>=4.27)"]
 video = ["av (==9.2.0)", "decord (==0.6.0)"]
 vision = ["Pillow (>=10.0.1,<=15.0)"]
 
@@ -2513,13 +2614,13 @@ files = [
 
 [[package]]
 name = "tzdata"
-version = "2023.3"
+version = "2023.4"
 description = "Provider of IANA time zone data"
 optional = true
 python-versions = ">=2"
 files = [
-    {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
-    {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
+    {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"},
+    {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"},
 ]
 
 [[package]]
@@ -2861,4 +2962,4 @@ torch = ["torch"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "d314f7dc9ea4d4e7581552f340d9171f24f709bf98de8b8c01c449c23026e7a3"
+content-hash = "33d533d21d14c258678a8c4bb28e2a15e8ebe5ca35d8589cbfe4a7b7d2e79a90"
diff --git a/server/pyproject.toml b/server/pyproject.toml
index d1452678..72a7afb0 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation-server"
-version = "1.3.4"
+version = "1.4.0"
 description = "Text Generation Inference Python gRPC Server"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
 
diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt
index 694242e1..e9267512 100644
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@@ -13,11 +13,11 @@ grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-reflection==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-status==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
 grpcio==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
+hf-transfer==0.1.5 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
+numpy==1.26.3 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@@ -28,18 +28,18 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13
 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
+pillow==10.2.0 ; python_version >= "3.9" and python_version < "3.13"
+protobuf==4.25.2 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
+regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
 safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13"
+scipy==1.12.0 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.15.0 ; python_version >= "3.9" and python_version < "3.13"
+setuptools==69.0.3 ; python_version >= "3.9" and python_version < "3.13"
+tokenizers==0.15.1 ; python_version >= "3.9" and python_version < "3.13"
 tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.36.1 ; python_version >= "3.9" and python_version < "3.13"
+transformers==4.37.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
 typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "3.13"
 urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt
index e0495fde..053429c9 100644
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@@ -12,11 +12,11 @@ grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-reflection==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-status==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
 grpcio==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
+hf-transfer==0.1.5 ; python_version >= "3.9" and python_version < "3.13"
 huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
 idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
+numpy==1.26.3 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@@ -27,18 +27,18 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13
 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
 packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
+pillow==10.2.0 ; python_version >= "3.9" and python_version < "3.13"
+protobuf==4.25.2 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
+regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
 safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13"
+scipy==1.12.0 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.15.0 ; python_version >= "3.9" and python_version < "3.13"
+setuptools==69.0.3 ; python_version >= "3.9" and python_version < "3.13"
+tokenizers==0.15.1 ; python_version >= "3.9" and python_version < "3.13"
 tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.36.1 ; python_version >= "3.9" and python_version < "3.13"
+transformers==4.37.1 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
 typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "3.13"
 urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/tests/utils/test_layers.py b/server/tests/utils/test_layers.py
index 0a9fecd1..93a0e982 100644
--- a/server/tests/utils/test_layers.py
+++ b/server/tests/utils/test_layers.py
@@ -3,24 +3,27 @@ from text_generation_server.utils.layers import (
     TensorParallelEmbedding,
 )
 
+
 class ProcessGroup:
     def __init__(self, rank: int, world_size: int):
         self._rank = rank
         self.world_size = world_size
 
-    def size(self)->int:
+    def size(self) -> int:
         return self.world_size
 
-    def rank(self)->int:
+    def rank(self) -> int:
         return self._rank
 
+
 class Weights:
     def __init__(self, rank: int, world_size: int, vocab_size: int, hidden_dim: int):
-        self.weight = torch.arange(vocab_size*hidden_dim).float().view(vocab_size, hidden_dim)
+        self.weight = (
+            torch.arange(vocab_size * hidden_dim).float().view(vocab_size, hidden_dim)
+        )
         self.process_group = ProcessGroup(rank, world_size)
 
-
-    def get_partial_sharded(self, name:str, dim: int):
+    def get_partial_sharded(self, name: str, dim: int):
         assert dim == 0
 
         rank = self.process_group.rank()
@@ -35,10 +38,11 @@ class Weights:
     def get_shape(self, name: str):
         return self.weight.shape
 
+
 def test_weight_hub_files_offline_error():
 
-    vocab_size= 17
-    weights = Weights(rank=0, world_size=1, vocab_size = vocab_size,hidden_dim = 256)
+    vocab_size = 17
+    weights = Weights(rank=0, world_size=1, vocab_size=vocab_size, hidden_dim=256)
     embeddings = TensorParallelEmbedding("", weights)
 
     input_ids = torch.arange(vocab_size)
@@ -47,18 +51,27 @@ def test_weight_hub_files_offline_error():
     assert embeddings.max_id == 17
     torch.testing.assert_close(output, torch.arange(256 * 17).float().view(17, 256))
 
-    weights_0_2 = Weights(rank=0, world_size=2, vocab_size = vocab_size,hidden_dim = 256)
-    weights_1_2 = Weights(rank=1, world_size=2, vocab_size = vocab_size,hidden_dim = 256)
+    weights_0_2 = Weights(rank=0, world_size=2, vocab_size=vocab_size, hidden_dim=256)
+    weights_1_2 = Weights(rank=1, world_size=2, vocab_size=vocab_size, hidden_dim=256)
     embeddings_0_2 = TensorParallelEmbedding("", weights_0_2, reduce=False)
     assert embeddings_0_2.min_id == 0
     assert embeddings_0_2.max_id == 9
-    torch.testing.assert_close(embeddings_0_2.weight , torch.cat([torch.arange(9 * 256), torch.zeros(256)], dim=0).view(10, 256).float())
+    torch.testing.assert_close(
+        embeddings_0_2.weight,
+        torch.cat([torch.arange(9 * 256), torch.zeros(256)], dim=0)
+        .view(10, 256)
+        .float(),
+    )
     embeddings_1_2 = TensorParallelEmbedding("", weights_1_2, reduce=False)
     assert embeddings_1_2.min_id == 9
     assert embeddings_1_2.max_id == 17
-    torch.testing.assert_close(embeddings_1_2.weight , torch.cat([torch.arange(8 * 256) + 9 * 256, torch.zeros(256)], dim=0).view(9, 256).float())
+    torch.testing.assert_close(
+        embeddings_1_2.weight,
+        torch.cat([torch.arange(8 * 256) + 9 * 256, torch.zeros(256)], dim=0)
+        .view(9, 256)
+        .float(),
+    )
     output_tp_0 = embeddings_0_2.forward(input_ids)
     output_tp_1 = embeddings_1_2.forward(input_ids)
 
     torch.testing.assert_close(output, output_tp_0 + output_tp_1)
-
diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py
index 99be6c7e..b74fbe36 100644
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@@ -226,7 +226,7 @@ def download_weights(
                 pass
         except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
             pass
-            
+
     elif (Path(model_id) / "adapter_config.json").exists():
         # Try to load as a local PEFT model
         try:
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 679e1e2f..68096709 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -230,7 +230,7 @@ def get_model(
                 dtype=dtype,
                 trust_remote_code=trust_remote_code,
             )
-        
+
     elif model_type == "phi":
         if FLASH_ATTENTION:
             return FlashPhi(
@@ -252,7 +252,9 @@ def get_model(
 
     elif model_type == "phi-msft":
         if FLASH_ATTENTION:
-            raise NotImplementedError("Legacy phi-msft is not supported with Flash Attention")
+            raise NotImplementedError(
+                "Legacy phi-msft is not supported with Flash Attention"
+            )
         else:
             return Phi(
                 model_id,
diff --git a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
index d103973f..96701794 100644
--- a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -17,6 +17,7 @@ from text_generation_server.utils.layers import (
     FastLayerNorm,
 )
 
+
 class PhiConfig(PretrainedConfig):
     def __init__(
         self,
@@ -25,15 +26,15 @@ class PhiConfig(PretrainedConfig):
         num_hidden_layers=32,
         num_attention_heads=32,
         num_key_value_heads=32,
-        hidden_act="gelu_fast",    # llama uses silu
-        layer_norm_eps=1e-05,      # rms in llama,
+        hidden_act="gelu_fast",  # llama uses silu
+        layer_norm_eps=1e-05,  # rms in llama,
         pad_token_id=0,
         bos_token_id=1,
         eos_token_id=2,
         tie_word_embeddings=False,
         rope_theta=10000.0,
-        resid_pdrop=0.1,           # llama doesn't have this
-        partial_rotary_factor=0.5, # important difference between llama and phi
+        resid_pdrop=0.1,  # llama doesn't have this
+        partial_rotary_factor=0.5,  # important difference between llama and phi
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -55,6 +56,7 @@ class PhiConfig(PretrainedConfig):
             **kwargs,
         )
 
+
 # this is the same as llama except for Phi uses bias=True
 def load_attention(config, prefix, weights):
     if config.num_attention_heads != config.num_key_value_heads:
@@ -68,6 +70,7 @@ def load_attention(config, prefix, weights):
             bias=True,
         )
 
+
 def _load_gqa(config, prefix: str, weights):
     assert config.hidden_size % config.num_attention_heads == 0
     assert config.num_attention_heads % weights.process_group.size() == 0
@@ -94,6 +97,7 @@ def _load_gqa(config, prefix: str, weights):
         get_linear(weight, bias=True, quantize=config.quantize)
     )
 
+
 class FlashPhiAttention(torch.nn.Module):
     def __init__(
         self,
@@ -173,8 +177,7 @@ class FlashPhiAttention(torch.nn.Module):
         #
         # Apply partial positional embeddings in place
         self.rotary_emb(
-            query[:, :, :self.rotary_dim], kv[:, 0, :, :self.rotary_dim],
-            cos, sin
+            query[:, :, : self.rotary_dim], kv[:, 0, :, : self.rotary_dim], cos, sin
         )
 
         # Reshape key and value and cache
@@ -210,7 +213,8 @@ class FlashPhiAttention(torch.nn.Module):
                 max_s,
             )
 
-        return self.dense(attn_output.view(-1, self.num_heads*self.head_size))
+        return self.dense(attn_output.view(-1, self.num_heads * self.head_size))
+
 
 class PhiMLP(nn.Module):
     def __init__(self, prefix, config, weights):
@@ -256,7 +260,9 @@ class FlashPhiLayer(nn.Module):
         )
         self.mlp = PhiMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
         self.input_layernorm = FastLayerNorm.load(
-            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.layer_norm_eps
+            prefix=f"{prefix}.input_layernorm",
+            weights=weights,
+            eps=config.layer_norm_eps,
         )
         self.resid_dropout = torch.nn.Dropout(config.resid_pdrop)
 
@@ -287,10 +293,13 @@ class FlashPhiLayer(nn.Module):
             max_s,
         )
 
-        hidden_states = self.resid_dropout(attn_output).add(self.resid_dropout(self.mlp(hidden_states)))
+        hidden_states = self.resid_dropout(attn_output).add(
+            self.resid_dropout(self.mlp(hidden_states))
+        )
 
         return hidden_states, res
 
+
 class FlashPhiModel(torch.nn.Module):
     def __init__(self, config, weights):
         super().__init__()
@@ -361,6 +370,7 @@ class FlashPhiModel(torch.nn.Module):
 
         return hidden_states
 
+
 class FlashPhiForCausalLM(torch.nn.Module):
     def __init__(self, config, weights):
         super().__init__()
@@ -380,7 +390,7 @@ class FlashPhiForCausalLM(torch.nn.Module):
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
         block_tables: torch.Tensor,
         slots: torch.Tensor,
-        input_lengths: torch.Tensor, 
+        input_lengths: torch.Tensor,
         max_s: int,
         lm_head_indices: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
diff --git a/server/text_generation_server/models/custom_modeling/mpt_modeling.py b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
index 1a9aef74..2c2fec48 100644
--- a/server/text_generation_server/models/custom_modeling/mpt_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
@@ -54,9 +54,19 @@ def load_col(config, prefix, weights, bias):
         bias_h = bias_h[0]
         bias_block_size = bias_h // bias_size
 
-        bias_q_part = bias_slice_[bias_rank * bias_block_size : (bias_rank + 1) * bias_block_size]
-        bias_k_part = bias_slice_[bias_h + bias_rank * bias_block_size : bias_h + (bias_rank + 1) * bias_block_size]
-        bias_v_part = bias_slice_[2 * bias_h + bias_rank * bias_block_size : 2 * bias_h + (bias_rank + 1) * bias_block_size]
+        bias_q_part = bias_slice_[
+            bias_rank * bias_block_size : (bias_rank + 1) * bias_block_size
+        ]
+        bias_k_part = bias_slice_[
+            bias_h
+            + bias_rank * bias_block_size : bias_h
+            + (bias_rank + 1) * bias_block_size
+        ]
+        bias_v_part = bias_slice_[
+            2 * bias_h
+            + bias_rank * bias_block_size : 2 * bias_h
+            + (bias_rank + 1) * bias_block_size
+        ]
 
         bias = torch.cat([bias_q_part, bias_k_part, bias_v_part], dim=0)
         if bias.dtype != torch.int32:
@@ -352,8 +362,12 @@ class MultiheadAttention(nn.Module):
             hidden_size = config.d_model
             head_dim = hidden_size // self.n_heads
 
-            self.q_ln = LPLayerNorm(d_model, bias=bias, prefix=f"{prefix}.q_ln", weights=weights)
-            self.k_ln = LPLayerNorm(self.n_heads * head_dim, prefix=f"{prefix}.k_ln", weights=weights)
+            self.q_ln = LPLayerNorm(
+                d_model, bias=bias, prefix=f"{prefix}.q_ln", weights=weights
+            )
+            self.k_ln = LPLayerNorm(
+                self.n_heads * head_dim, prefix=f"{prefix}.k_ln", weights=weights
+            )
         if self.attn_impl == "flash":
             self.attn_fn = flash_attn_fn
         elif self.attn_impl == "triton":
@@ -684,7 +698,6 @@ class LPLayerNorm(torch.nn.LayerNorm):
                 self.bias = nn.Parameter(weights.get_sharded(f"{prefix}.bias", dim=0))
             self.normalized_shape = self.weight.shape
 
-
     def forward(self, x):
         module_device = x.device
         downcast_x = _cast_if_autocast_enabled(x)
@@ -798,7 +811,7 @@ class MPTModel(MPTPreTrainedModel):
         self.wte = TensorParallelEmbedding("transformer.wte", weights)
 
         if not self.alibi:
-           self.wpe = TensorParallelEmbedding("transformer.wpe", weights)
+            self.wpe = TensorParallelEmbedding("transformer.wpe", weights)
         self.blocks = nn.ModuleList(
             [
                 MPTBlock(config, prefix=f"transformer.blocks.{i}", weights=weights)
diff --git a/server/text_generation_server/models/custom_modeling/phi_modeling.py b/server/text_generation_server/models/custom_modeling/phi_modeling.py
index f9999537..e5c09728 100644
--- a/server/text_generation_server/models/custom_modeling/phi_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/phi_modeling.py
@@ -62,14 +62,12 @@ class PhiConfig(PretrainedConfig):
             **kwargs,
         )
 
+
 # RotaryEmbedding is a class that implements the rotary embedding.
 class RotaryEmbedding(nn.Module):
     def __init__(self, dim, max_seq_len):
         super().__init__()
-        inv_freq = [
-            1.0 / 10000.0 ** (i / dim)
-            for i in range(0, dim, 2)
-        ]
+        inv_freq = [1.0 / 10000.0 ** (i / dim) for i in range(0, dim, 2)]
         inv_freq_len = len(inv_freq)
         inv_freq = torch.tensor(inv_freq).view(1, inv_freq_len)
         t = torch.arange(0, max_seq_len, dtype=torch.float).view(max_seq_len, 1)
@@ -131,6 +129,7 @@ class PhiCausalLMHead(nn.Module):
         hidden_states = self.linear(hidden_states)
         return hidden_states
 
+
 # PhiMHA is a multi-head attention layer. This layer uses an attention mask to prevent tokens from attending to subsequent tokens.
 class PhiMHA(nn.Module):
     def __init__(self, prefix, config, weights):
@@ -172,19 +171,27 @@ class PhiMHA(nn.Module):
             v = torch.cat([prev_v, v], dim=1)
 
         past_kv_cache = [k, v]
-        attn_weights = torch.einsum('bthd,bshd->bhts', q, k * self.softmax_scale)
+        attn_weights = torch.einsum("bthd,bshd->bhts", q, k * self.softmax_scale)
 
         if attention_mask is not None:
             seqlen_k = k.shape[1]
             seqlen_q = q.shape[1]
-            causal_mask = torch.triu(torch.full((seqlen_q, seqlen_k), -10000.0, device=attn_weights.device), 1)
+            causal_mask = torch.triu(
+                torch.full((seqlen_q, seqlen_k), -10000.0, device=attn_weights.device),
+                1,
+            )
             attn_weights = attn_weights + causal_mask.to(dtype=attn_weights.dtype)
-  
+
         attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
         attn_output = attn_weights.matmul(v.transpose(1, 2)).squeeze(0)
-        attn_output = attn_output.view((b_size, self.num_heads, seq_len, self.head_dim)).transpose(1, 2).flatten(-2)
+        attn_output = (
+            attn_output.view((b_size, self.num_heads, seq_len, self.head_dim))
+            .transpose(1, 2)
+            .flatten(-2)
+        )
         return self.out_proj(attn_output), past_kv_cache
 
+
 # PhiMLP is a multi-layer perceptron. It contains two linear layers with a gelu activation function.
 class PhiMLP(nn.Module):
     def __init__(self, prefix, config, weights):
@@ -204,19 +211,22 @@ class PhiMLP(nn.Module):
             bias=False,
         )
         self.activation = torch.nn.functional.gelu
-            
+
     def forward(self, hidden_states):
         hidden_states = self.fc1(hidden_states)
         hidden_states = self.activation(hidden_states)
         hidden_states = self.fc2(hidden_states)
         return hidden_states
 
+
 # PhiBlock is a single transformer block. It contains a layer norm, a multi-head attention layer and an multi-layer perceptron.
 class PhiBlock(nn.Module):
     def __init__(self, layer_id, config, weights):
         super().__init__()
         self.layer_id = layer_id
-        self.layer_norm = nn.LayerNorm.load(prefix=f"{layer_id}.ln", weights=weights, eps=config.layer_norm_epsilon)
+        self.layer_norm = nn.LayerNorm.load(
+            prefix=f"{layer_id}.ln", weights=weights, eps=config.layer_norm_epsilon
+        )
         self.mixer = PhiMHA(prefix=f"{layer_id}.mixer", config=config, weights=weights)
         self.mlp = PhiMLP(prefix=f"{layer_id}.mlp", config=config, weights=weights)
 
@@ -228,11 +238,14 @@ class PhiBlock(nn.Module):
     ):
         residual = hidden_states
         hidden_states = self.layer_norm(hidden_states)
-        attn_outputs, past_kv_cache = self.mixer(hidden_states, kv_cache, attention_mask)
+        attn_outputs, past_kv_cache = self.mixer(
+            hidden_states, kv_cache, attention_mask
+        )
         feed_forward_hidden_states = self.mlp(hidden_states)
         out = attn_outputs + feed_forward_hidden_states + residual
         return out, past_kv_cache
 
+
 # PhiModel implements the embedding layer and the transformer blocks.
 class PhiModel(nn.Module):
     def __init__(self, config, weights):
@@ -241,9 +254,12 @@ class PhiModel(nn.Module):
         self.tp_world_size = weights.process_group.size()
         self.embed_tokens = TensorParallelEmbedding(
             prefix="transformer.embd.wte", weights=weights
-        )        
+        )
         self.blocks = nn.ModuleList(
-            [PhiBlock(f"transformer.h.{layer_id}", config, weights) for layer_id in range(config.n_layer)]
+            [
+                PhiBlock(f"transformer.h.{layer_id}", config, weights)
+                for layer_id in range(config.n_layer)
+            ]
         )
 
     def forward(
@@ -258,14 +274,19 @@ class PhiModel(nn.Module):
         seq_len = hidden_states.shape[1]
         mask = None if seq_len <= 1 else attention_mask
 
-        past_key_values = [None] * len(self.blocks) if past_key_values is None else past_key_values
+        past_key_values = (
+            [None] * len(self.blocks) if past_key_values is None else past_key_values
+        )
 
         for index, block in enumerate(self.blocks):
-            hidden_states, new_key_values = block(hidden_states, past_key_values[index], mask)
+            hidden_states, new_key_values = block(
+                hidden_states, past_key_values[index], mask
+            )
             past_key_values[index] = new_key_values
 
         return hidden_states, past_key_values
 
+
 # PhiForCausalLM wraps the PhiModel and PhiCausalLMHead together and returns a CausalLMOutputWithPast object.
 class PhiForCausalLM(torch.nn.Module):
     def __init__(self, config, weights):
@@ -290,12 +311,15 @@ class PhiForCausalLM(torch.nn.Module):
         loss = None
         if labels is not None:
             loss = nn.CrossEntropyLoss()(
-                logits[:, :-1].view(-1, logits.size(-1)),
-                labels[:, 1:].view(-1)
+                logits[:, :-1].view(-1, logits.size(-1)), labels[:, 1:].view(-1)
             )
 
         if not return_dict:
-            return ((loss,) + (logits,) + model_output[1:]) if loss is not None else (logits,) + model_output[1:]
+            return (
+                ((loss,) + (logits,) + model_output[1:])
+                if loss is not None
+                else (logits,) + model_output[1:]
+            )
 
         return CausalLMOutputWithPast(
             loss=loss,
@@ -304,5 +328,3 @@ class PhiForCausalLM(torch.nn.Module):
             hidden_states=None,
             attentions=None,
         )
-
-        
diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py
index 7be61906..94bd58f4 100644
--- a/server/text_generation_server/models/flash_llama.py
+++ b/server/text_generation_server/models/flash_llama.py
@@ -73,11 +73,11 @@ class FlashLlama(FlashCausalLM):
             import json
             import os
             from pathlib import Path
-            
-            is_local_model = (Path(use_medusa).exists() and Path(use_medusa).is_dir()) or os.getenv(
-                "WEIGHTS_CACHE_OVERRIDE", None
-            ) is not None
-            
+
+            is_local_model = (
+                Path(use_medusa).exists() and Path(use_medusa).is_dir()
+            ) or os.getenv("WEIGHTS_CACHE_OVERRIDE", None) is not None
+
             if not is_local_model:
                 medusa_config = hf_hub_download(
                     use_medusa, revision=revision, filename="config.json"
@@ -88,7 +88,7 @@ class FlashLlama(FlashCausalLM):
             else:
                 medusa_config = str(Path(use_medusa) / "config.json")
                 medusa_head = str(Path(use_medusa) / "medusa_lm_head.pt")
-                
+
             with open(medusa_config, "r") as f:
                 config = json.load(f)
             medusa_sf = medusa_head[: -len(".pt")] + ".safetensors"
diff --git a/server/text_generation_server/models/flash_phi.py b/server/text_generation_server/models/flash_phi.py
index 1c49f2a9..061b9740 100644
--- a/server/text_generation_server/models/flash_phi.py
+++ b/server/text_generation_server/models/flash_phi.py
@@ -63,11 +63,11 @@ class FlashPhi(FlashCausalLM):
             import json
             import os
             from pathlib import Path
-            
-            is_local_model = (Path(use_medusa).exists() and Path(use_medusa).is_dir()) or os.getenv(
-                "WEIGHTS_CACHE_OVERRIDE", None
-            ) is not None
-            
+
+            is_local_model = (
+                Path(use_medusa).exists() and Path(use_medusa).is_dir()
+            ) or os.getenv("WEIGHTS_CACHE_OVERRIDE", None) is not None
+
             if not is_local_model:
                 medusa_config = hf_hub_download(
                     use_medusa, revision=revision, filename="config.json"
@@ -78,7 +78,7 @@ class FlashPhi(FlashCausalLM):
             else:
                 medusa_config = str(Path(use_medusa) / "config.json")
                 medusa_head = str(Path(use_medusa) / "medusa_lm_head.pt")
-                
+
             with open(medusa_config, "r") as f:
                 config = json.load(f)
             medusa_sf = medusa_head[: -len(".pt")] + ".safetensors"
diff --git a/server/text_generation_server/models/phi.py b/server/text_generation_server/models/phi.py
index d477478a..79aa3fb9 100644
--- a/server/text_generation_server/models/phi.py
+++ b/server/text_generation_server/models/phi.py
@@ -5,13 +5,17 @@ from transformers import AutoConfig, AutoTokenizer
 from typing import Optional, List, Tuple
 
 from text_generation_server.models import CausalLM
-from text_generation_server.models.custom_modeling.phi_modeling import PhiConfig, PhiForCausalLM
+from text_generation_server.models.custom_modeling.phi_modeling import (
+    PhiConfig,
+    PhiForCausalLM,
+)
 from text_generation_server.utils import (
     initialize_torch_distributed,
     weight_files,
     Weights,
 )
 
+
 class Phi(CausalLM):
     def __init__(
         self,
@@ -60,4 +64,3 @@ class Phi(CausalLM):
             dtype=dtype,
             device=device,
         )
-
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index 6ddfd6f4..010d6143 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -510,7 +510,9 @@ class TensorParallelEmbedding(nn.Module):
         block_size = (num_embeddings + world_size - 1) // world_size
         self.min_id = rank * block_size
         self.max_id = min(num_embeddings, (rank + 1) * block_size)
-        self.null_idx = weight.shape[0]  # Usually block_size, might be less in non even vocab_size.
+        self.null_idx = weight.shape[
+            0
+        ]  # Usually block_size, might be less in non even vocab_size.
         self.process_group = weights.process_group
         self.reduce = reduce
 

From 069895b9859b776cab8145f3fa6f6d16ac40af47 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 26 Jan 2024 20:13:47 +0100
Subject: [PATCH 18/31] Fixing top_n_tokens. (#1497)

# What does this PR do?

Superseeds #1459

The fix works as follows.
We updated next_token_chooser to return all logprbs, then
batch_top_n_tokens, now also gets accepted_ids + speculated_length (so
it knows how to interpret the flat logprobs).

We then update the code to return lists ot `Tokens` that it expects.
<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 server/tests/utils/test_tokens.py             | 42 ++++++++----
 .../models/causal_lm.py                       | 35 ++++++----
 .../models/flash_causal_lm.py                 | 39 ++++++-----
 .../models/seq2seq_lm.py                      | 35 ++++++----
 server/text_generation_server/models/types.py |  2 +-
 server/text_generation_server/utils/tokens.py | 65 +++++++++++++------
 6 files changed, 142 insertions(+), 76 deletions(-)

diff --git a/server/tests/utils/test_tokens.py b/server/tests/utils/test_tokens.py
index 0585f1fb..d3f2d766 100644
--- a/server/tests/utils/test_tokens.py
+++ b/server/tests/utils/test_tokens.py
@@ -50,19 +50,39 @@ def test_batch_top_tokens():
     top_n_tokens = [0, 2, 3, 4, 5]
     top_n_tokens_tensor = torch.tensor(top_n_tokens)
     inp_logprobs = torch.tensor([[-1.0, -3.0, -4.0, -2.0, -3.0]] * 5)
+    accepted_ids = torch.ones_like(top_n_tokens_tensor)
 
     topn_tok_ids, topn_tok_logprobs = batch_top_tokens(
-        top_n_tokens, top_n_tokens_tensor, inp_logprobs
+        top_n_tokens, top_n_tokens_tensor, inp_logprobs, accepted_ids
     )
 
-    assert topn_tok_ids[0] == []
-    assert topn_tok_ids[1] == [0, 3]
-    assert topn_tok_ids[2] == [0, 3, 1, 4]
-    assert topn_tok_ids[3] == [0, 3, 1, 4]
-    assert topn_tok_ids[4] == [0, 3, 1, 4, 2]
+    assert topn_tok_ids[0] == [[]]
+    assert topn_tok_ids[1] == [[0, 3]]
+    assert topn_tok_ids[2] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[3] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[4] == [[0, 3, 1, 4, 2]]
 
-    assert topn_tok_logprobs[0] == []
-    assert topn_tok_logprobs[1] == [-1, -2]
-    assert topn_tok_logprobs[2] == [-1, -2, -3, -3]
-    assert topn_tok_logprobs[3] == [-1, -2, -3, -3]
-    assert topn_tok_logprobs[4] == [-1, -2, -3, -3, -4]
+    assert topn_tok_logprobs[0] == [[]]
+    assert topn_tok_logprobs[1] == [[-1, -2]]
+    assert topn_tok_logprobs[2] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[3] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[4] == [[-1, -2, -3, -3, -4]]
+
+    # Now let's make second member of the batch be speculated
+    inp_logprobs = torch.tensor([[-1.0, -3.0, -4.0, -2.0, -3.0]] * 5 * 2)
+    accepted_ids[1]  = 2
+    topn_tok_ids, topn_tok_logprobs = batch_top_tokens(
+        top_n_tokens, top_n_tokens_tensor, inp_logprobs, accepted_ids
+    )
+
+    assert topn_tok_ids[0] == [[]]
+    assert topn_tok_ids[1] == [[0, 3], [0, 3]]
+    assert topn_tok_ids[2] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[3] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[4] == [[0, 3, 1, 4, 2]]
+
+    assert topn_tok_logprobs[0] == [[]]
+    assert topn_tok_logprobs[1] == [[-1, -2], [-1, -2]]
+    assert topn_tok_logprobs[2] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[3] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[4] == [[-1, -2, -3, -3, -4]]
diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py
index 7b10256c..29e9f8b1 100644
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@@ -580,10 +580,13 @@ class CausalLM(Model):
         generations: List[Generation] = []
         stopped = True
 
+        # Speculation is not active for causal
+        accepted_ids = torch.ones_like(batch.input_ids)[:, 0]
         batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
             batch.top_n_tokens,
             batch.top_n_tokens_tensor,
             torch.log_softmax(logits[:, -1], -1),
+            accepted_ids,
         )
 
         start_decode = time.time_ns()
@@ -692,20 +695,24 @@ class CausalLM(Model):
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [
-                        token_id in self.all_special_ids for token_id in top_token_ids
-                    ]
-                    top_tokens = Tokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for (top_token_ids, top_token_logprobs) in zip(top_token_ids, top_token_logprobs):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 930082cd..53a3d582 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -842,6 +842,8 @@ class FlashCausalLM(Model):
         else:
             next_token_logits = out
 
+
+        speculate = get_speculate()
         (
             next_input_ids,
             next_token_logprobs,
@@ -851,16 +853,15 @@ class FlashCausalLM(Model):
         ) = batch.next_token_chooser(
             batch.all_input_ids_tensor[:, : batch.max_seqlen],
             next_token_logits,
-            get_speculate(),
+            speculate,
             batch.speculative_ids,
             speculative_logits,
         )
 
         batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
-            batch.top_n_tokens, batch.top_n_tokens_tensor, logprobs
+            batch.top_n_tokens, batch.top_n_tokens_tensor, logprobs, accepted_ids
         )
 
-        speculative_length = 0 if speculative_ids is None else speculative_ids.shape[1]
         if prefill:
             if len(batch) > 1 and prefill_logprobs:
                 # We create the prefill_tokens_indices tensor that will be used to gather prefill logprobs
@@ -1062,20 +1063,24 @@ class FlashCausalLM(Model):
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [
-                        token_id in self.all_special_ids for token_id in top_token_ids
-                    ]
-                    top_tokens = Tokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for (top_token_ids, top_token_logprobs) in zip(top_token_ids, top_token_logprobs):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
diff --git a/server/text_generation_server/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py
index f2e4cec6..8b93aecd 100644
--- a/server/text_generation_server/models/seq2seq_lm.py
+++ b/server/text_generation_server/models/seq2seq_lm.py
@@ -640,10 +640,13 @@ class Seq2SeqLM(Model):
             batch.past_key_values,
         )
 
+        # Speculation is not active for seq2seq
+        accepted_ids = torch.ones_like(batch.decoder_input_ids)[:, 0]
         batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
             batch.top_n_tokens,
             batch.top_n_tokens_tensor,
             torch.log_softmax(logits[:, -1], -1),
+            accepted_ids,
         )
 
         start_decode = time.time_ns()
@@ -746,20 +749,24 @@ class Seq2SeqLM(Model):
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [
-                        token_id in self.all_special_ids for token_id in top_token_ids
-                    ]
-                    top_tokens = Tokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for (top_token_ids, top_token_logprobs) in zip(top_token_ids, top_token_logprobs):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
diff --git a/server/text_generation_server/models/types.py b/server/text_generation_server/models/types.py
index f85f27e5..bc68812e 100644
--- a/server/text_generation_server/models/types.py
+++ b/server/text_generation_server/models/types.py
@@ -95,5 +95,5 @@ class Generation:
             generated_text=self.generated_text.to_pb()
             if self.generated_text is not None
             else None,
-            top_tokens=self.top_tokens.to_pb() if self.top_tokens is not None else None,
+            top_tokens=[top_tokens.to_pb() for top_tokens in self.top_tokens] if self.top_tokens is not None else None,
         )
diff --git a/server/text_generation_server/utils/tokens.py b/server/text_generation_server/utils/tokens.py
index 04cc8d97..270a6990 100644
--- a/server/text_generation_server/utils/tokens.py
+++ b/server/text_generation_server/utils/tokens.py
@@ -277,7 +277,8 @@ class HeterogeneousNextTokenChooser:
             scores[:, j] = _scores
             next_ids[:, j] = _next_ids
         next_ids = next_ids.view(B * S)
-        scores = scores.view(B * S, -1)
+        allscores = scores.view(B * S, -1)
+        alllogprobs = torch.log_softmax(allscores, -1)
 
         if speculated_ids is not None:
             accepted_ids = []
@@ -305,16 +306,17 @@ class HeterogeneousNextTokenChooser:
                 accepted_ids, device=input_ids.device, dtype=input_ids.dtype
             )
             next_ids = next_ids[indices]
-            scores = scores[indices]
+            logprobs = alllogprobs[indices]
             indices = torch.arange(B, device=input_ids.device) * S
             if speculative_scores is not None:
                 speculative_scores = speculative_scores[indices + accepted_ids - 1]
         else:
             accepted_ids = torch.ones_like(next_ids)
+            logprobs = alllogprobs
 
-        logprobs = torch.log_softmax(scores, -1)
         next_logprobs = torch.gather(logprobs, 1, next_ids.view(-1, 1)).view(-1)
 
+
         if speculate > 0:
             if speculative_scores is not None:
                 # Medusa provided some scores
@@ -327,7 +329,7 @@ class HeterogeneousNextTokenChooser:
         else:
             speculative_ids = None
 
-        return next_ids, next_logprobs, logprobs, accepted_ids, speculative_ids
+        return next_ids, next_logprobs, alllogprobs, accepted_ids, speculative_ids
 
     def filter(self, indices):
         if self.watermark_processor is not None:
@@ -436,8 +438,8 @@ class HeterogeneousSampling:
 
 
 def batch_top_tokens(
-    top_n_tokens: List[int], top_n_tokens_tensor: torch.Tensor, logprobs: torch.Tensor
-) -> Tuple[List[List[int]], List[List[float]]]:
+    top_n_tokens: List[int], top_n_tokens_tensor: torch.Tensor, logprobs: torch.Tensor, accepted_ids: torch.Tensor
+) -> Tuple[List[List[List[int]]], List[List[List[float]]]]:
     """Find the top n most likely tokens for a batch of generations.
 
     When multiple tokens have equal probabilities and they don't all fit, the
@@ -446,14 +448,19 @@ def batch_top_tokens(
     max_top_n = max(top_n_tokens)
     # Early exit when top_n_tokens is not used
     if max_top_n == 0:
-        return [[]] * len(top_n_tokens), [[]] * len(top_n_tokens)
+        return [[[]]] * len(top_n_tokens), [[[]]] * len(top_n_tokens)
 
+
+    batch_size = accepted_ids.shape[0]
+    speculate_size = logprobs.shape[0] // batch_size
+    top_n_tokens_tensor = top_n_tokens_tensor.repeat_interleave(speculate_size)
     # Ensure top_n doesn't exceed vocab size
-    top_n_tokens = [min(tok, logprobs.size(-1)) for tok in top_n_tokens]
+    top_n_tokens = [min(tok, logprobs.size(-1)) for tok in top_n_tokens for _ in range(speculate_size)]
 
     # Parallel kthvalue adapted from https://discuss.pytorch.org/t/how-to-efficiently-get-the-k-th-largest-values-in-parallel/160529/2
     # Sorted topk is faster than torch.sort() since we only need a small subset
-    sorted_top_k = torch.topk(logprobs, k=max_top_n, dim=1, sorted=True).values
+    sorted_top_k = torch.topk(logprobs, k=max_top_n, dim=-1, sorted=True).values
+
     nth_highest = torch.gather(
         sorted_top_k, 1, (top_n_tokens_tensor - 1).clip(min=0).unsqueeze(1)
     )
@@ -471,13 +478,33 @@ def batch_top_tokens(
     top_indices = top_k.indices.tolist()
     top_values = top_k.values.tolist()
 
-    return (
-        [
-            idxs[:n] if req_n > 0 else []
-            for idxs, n, req_n in zip(top_indices, top_n_ishes, top_n_tokens)
-        ],
-        [
-            vals[:n] if req_n > 0 else []
-            for vals, n, req_n in zip(top_values, top_n_ishes, top_n_tokens)
-        ],
-    )
+    batch_top_token_ids = []
+    batch_top_token_logprobs = []
+    accepted_ids_list = accepted_ids.tolist()
+    for i, n_accepted_ids in enumerate(accepted_ids_list):
+        start = speculate_size * i
+        stop = speculate_size * (i + 1)
+        _top_indices = top_indices[start: stop]
+        _top_values = top_values[start: stop]
+        _top_n_ishes = top_n_ishes[start: stop]
+        _top_n_tokens = top_n_tokens[start: stop]
+
+        _top_indices = _top_indices[:n_accepted_ids]
+        _top_values = _top_values[:n_accepted_ids]
+        _top_n_ishes = _top_n_ishes[:n_accepted_ids]
+        _top_n_tokens = _top_n_tokens[:n_accepted_ids]
+
+        row_top_token_ids = []
+        row_top_token_logprobs = []
+
+        for idxs, vals, n, req_n in zip(_top_indices, _top_values, _top_n_ishes, _top_n_tokens):
+            indices = idxs[:n] if req_n > 0 else []
+            values = vals[:n] if req_n > 0 else []
+
+            row_top_token_ids.append(indices)
+            row_top_token_logprobs.append(values)
+
+        batch_top_token_ids.append(row_top_token_ids)
+        batch_top_token_logprobs.append(row_top_token_logprobs)
+
+    return batch_top_token_ids, batch_top_token_logprobs

From 0424dabb0179a0b6b76186244d716cf43e034cfd Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 29 Jan 2024 11:20:08 +0100
Subject: [PATCH 19/31] Sending compute type from the environment instead of
 hardcoded string (#1504)

# What does this PR do?

Sending compute type from the environment instead of hardcoded string

Using env is slow, therefore getting it from global state instead.

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 router/src/server.rs | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/router/src/server.rs b/router/src/server.rs
index 998d6265..39d1de38 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -57,6 +57,7 @@ example = json ! ({"error": "Incomplete generation"})),
 async fn compat_generate(
     Extension(default_return_full_text): Extension<bool>,
     infer: Extension<Infer>,
+    compute_type: Extension<ComputeType>,
     Json(mut req): Json<CompatGenerateRequest>,
 ) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
     // default return_full_text given the pipeline_tag
@@ -66,11 +67,11 @@ async fn compat_generate(
 
     // switch on stream
     if req.stream {
-        Ok(generate_stream(infer, Json(req.into()))
+        Ok(generate_stream(infer,compute_type,  Json(req.into()))
             .await
             .into_response())
     } else {
-        let (headers, Json(generation)) = generate(infer, Json(req.into())).await?;
+        let (headers, Json(generation)) = generate(infer, compute_type, Json(req.into())).await?;
         // wrap generation inside a Vec to match api-inference
         Ok((headers, Json(vec![generation])).into_response())
     }
@@ -145,6 +146,7 @@ seed,
 )]
 async fn generate(
     infer: Extension<Infer>,
+    Extension(ComputeType(compute_type)): Extension<ComputeType>,
     Json(req): Json<GenerateRequest>,
 ) -> Result<(HeaderMap, Json<GenerateResponse>), (StatusCode, Json<ErrorResponse>)> {
     let span = tracing::Span::current();
@@ -230,7 +232,7 @@ async fn generate(
 
     // Headers
     let mut headers = HeaderMap::new();
-    headers.insert("x-compute-type", "gpu+optimized".parse().unwrap());
+    headers.insert("x-compute-type", compute_type.parse().unwrap());
     headers.insert(
         "x-compute-time",
         total_time.as_millis().to_string().parse().unwrap(),
@@ -339,6 +341,7 @@ seed,
 )]
 async fn generate_stream(
     Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
     Json(req): Json<GenerateRequest>,
 ) -> (
     HeaderMap,
@@ -349,13 +352,14 @@ async fn generate_stream(
         event.json_data(stream_token).unwrap()
     };
     let (headers, response_stream) =
-        generate_stream_internal(infer, Json(req), on_message_callback).await;
+        generate_stream_internal(infer, compute_type, Json(req), on_message_callback).await;
     let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
     (headers, sse)
 }
 
 async fn generate_stream_internal(
     infer: Infer,
+    ComputeType(compute_type): ComputeType,
     Json(req): Json<GenerateRequest>,
     on_message_callback: impl Fn(StreamResponse) -> Event,
 ) -> (HeaderMap, impl Stream<Item = Result<Event, Infallible>>) {
@@ -368,7 +372,7 @@ async fn generate_stream_internal(
     let compute_characters = req.inputs.chars().count();
 
     let mut headers = HeaderMap::new();
-    headers.insert("x-compute-type", "gpu+optimized".parse().unwrap());
+    headers.insert("x-compute-type",compute_type.parse().unwrap());
     headers.insert(
         "x-compute-characters",
         compute_characters.to_string().parse().unwrap(),
@@ -557,6 +561,7 @@ async fn generate_stream_internal(
     )]
 async fn chat_completions(
     Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
     Extension(info): Extension<Info>,
     Json(req): Json<ChatRequest>,
 ) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
@@ -645,12 +650,12 @@ async fn chat_completions(
         };
 
         let (headers, response_stream) =
-            generate_stream_internal(infer, Json(generate_request), on_message_callback).await;
+            generate_stream_internal(infer, compute_type, Json(generate_request), on_message_callback).await;
         let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
         Ok((headers, sse).into_response())
     } else {
         let (headers, Json(generation)) =
-            generate(Extension(infer), Json(generate_request)).await?;
+            generate(Extension(infer), Extension(compute_type), Json(generate_request)).await?;
 
         let current_time = std::time::SystemTime::now()
             .duration_since(std::time::UNIX_EPOCH)
@@ -729,6 +734,9 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
     prom_handle.render()
 }
 
+#[derive(Clone, Debug)]
+pub(crate) struct ComputeType(String);
+
 /// Serving method
 #[allow(clippy::too_many_arguments)]
 pub async fn run(
@@ -935,6 +943,8 @@ pub async fn run(
         Router::new().route("/invocations", post(compat_generate)) // Use 'compat_generate' otherwise
     };
 
+    let compute_type = ComputeType(std::env::var("COMPUTE_TYPE").unwrap_or("gpu+optimized".to_string()));
+
     // Combine routes and layers
     let app = Router::new()
         .merge(swagger_ui)
@@ -944,6 +954,7 @@ pub async fn run(
         .layer(Extension(health_ext.clone()))
         .layer(Extension(compat_return_full_text))
         .layer(Extension(infer))
+        .layer(Extension(compute_type))
         .layer(Extension(prom_handle.clone()))
         .layer(OtelAxumLayer::default())
         .layer(cors_layer);

From a9ea60684b6445b2507e147c6aeed0edb0b25eb7 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 29 Jan 2024 12:30:50 +0100
Subject: [PATCH 20/31] Create the compute type at launch time (if not provided
 in the env). (#1505)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 launcher/src/main.rs | 24 ++++++++++++++++++++++--
 router/src/server.rs | 24 +++++++++++++++++-------
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index f0e45141..054e546c 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -982,7 +982,20 @@ fn spawn_shards(
     Ok(())
 }
 
+fn compute_type(num_shard: usize) -> Option<String> {
+    let output = Command::new("nvidia-smi")
+        .args(["--query-gpu=gpu_name", "--format=csv"])
+        .output()
+        .ok()?;
+    let output = String::from_utf8(output.stdout).ok()?;
+    let fullname = output.split('\n').nth(1)?;
+    let cardname = fullname.replace(' ', "-").to_lowercase();
+    let compute_type = format!("{num_shard}-{cardname}");
+    Some(compute_type)
+}
+
 fn spawn_webserver(
+    num_shard: usize,
     args: Args,
     shutdown: Arc<AtomicBool>,
     shutdown_receiver: &mpsc::Receiver<()>,
@@ -1072,6 +1085,13 @@ fn spawn_webserver(
         envs.push(("HUGGING_FACE_HUB_TOKEN".into(), api_token.into()))
     };
 
+    // Parse Compute type
+    if let Ok(compute_type) = env::var("COMPUTE_TYPE") {
+        envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
+    } else if let Some(compute_type) = compute_type(num_shard) {
+        envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
+    }
+
     let mut webserver = match Command::new("text-generation-router")
         .args(router_args)
         .envs(envs)
@@ -1265,8 +1285,8 @@ fn main() -> Result<(), LauncherError> {
         return Ok(());
     }
 
-    let mut webserver =
-        spawn_webserver(args, shutdown.clone(), &shutdown_receiver).map_err(|err| {
+    let mut webserver = spawn_webserver(num_shard, args, shutdown.clone(), &shutdown_receiver)
+        .map_err(|err| {
             shutdown_shards(shutdown.clone(), &shutdown_receiver);
             err
         })?;
diff --git a/router/src/server.rs b/router/src/server.rs
index 39d1de38..52ed03df 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -67,7 +67,7 @@ async fn compat_generate(
 
     // switch on stream
     if req.stream {
-        Ok(generate_stream(infer,compute_type,  Json(req.into()))
+        Ok(generate_stream(infer, compute_type, Json(req.into()))
             .await
             .into_response())
     } else {
@@ -372,7 +372,7 @@ async fn generate_stream_internal(
     let compute_characters = req.inputs.chars().count();
 
     let mut headers = HeaderMap::new();
-    headers.insert("x-compute-type",compute_type.parse().unwrap());
+    headers.insert("x-compute-type", compute_type.parse().unwrap());
     headers.insert(
         "x-compute-characters",
         compute_characters.to_string().parse().unwrap(),
@@ -649,13 +649,22 @@ async fn chat_completions(
                 )
         };
 
-        let (headers, response_stream) =
-            generate_stream_internal(infer, compute_type, Json(generate_request), on_message_callback).await;
+        let (headers, response_stream) = generate_stream_internal(
+            infer,
+            compute_type,
+            Json(generate_request),
+            on_message_callback,
+        )
+        .await;
         let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
         Ok((headers, sse).into_response())
     } else {
-        let (headers, Json(generation)) =
-            generate(Extension(infer), Extension(compute_type), Json(generate_request)).await?;
+        let (headers, Json(generation)) = generate(
+            Extension(infer),
+            Extension(compute_type),
+            Json(generate_request),
+        )
+        .await?;
 
         let current_time = std::time::SystemTime::now()
             .duration_since(std::time::UNIX_EPOCH)
@@ -943,7 +952,8 @@ pub async fn run(
         Router::new().route("/invocations", post(compat_generate)) // Use 'compat_generate' otherwise
     };
 
-    let compute_type = ComputeType(std::env::var("COMPUTE_TYPE").unwrap_or("gpu+optimized".to_string()));
+    let compute_type =
+        ComputeType(std::env::var("COMPUTE_TYPE").unwrap_or("gpu+optimized".to_string()));
 
     // Combine routes and layers
     let app = Router::new()

From 2d56f106a60c7b698705494e7539f8a7e4c85dd9 Mon Sep 17 00:00:00 2001
From: freitng <153592523+freitng@users.noreply.github.com>
Date: Mon, 29 Jan 2024 17:02:57 +0100
Subject: [PATCH 21/31] Modify default for max_new_tokens in python client
 (#1336)

# What does this PR do?
Since
([#1097](https://github.com/huggingface/text-generation-inference/pull/1097))
the clients do not need to specify a max_length anymore. However, the
python client in this repo had not yet been adapted to these changes.
This PR makes it possible to use the python client and not provide
max_new_tokens.

<!-- Remove if not applicable -->


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [x] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [x] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.
---
 clients/python/tests/test_client.py      | 16 ++++++++++++++++
 clients/python/text_generation/client.py |  8 ++++----
 clients/python/text_generation/types.py  |  2 +-
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py
index 1e25e1b1..775e7a6c 100644
--- a/clients/python/tests/test_client.py
+++ b/clients/python/tests/test_client.py
@@ -21,6 +21,22 @@ def test_generate(flan_t5_xxl_url, hf_headers):
     assert not response.details.tokens[0].special
 
 
+def test_generate_max_new_tokens_not_set(flan_t5_xxl_url, hf_headers):
+    client = Client(flan_t5_xxl_url, hf_headers)
+    response = client.generate("test", decoder_input_details=True)
+
+    assert response.generated_text != ""
+    assert response.details.finish_reason == FinishReason.EndOfSequenceToken
+    assert response.details.generated_tokens > 1
+    assert response.details.seed is None
+    assert len(response.details.prefill) == 1
+    assert response.details.prefill[0] == InputToken(id=0, text="<pad>", logprob=None)
+    assert len(response.details.tokens) > 1
+    assert response.details.tokens[0].id == 3
+    assert response.details.tokens[0].text == " "
+    assert not response.details.tokens[0].special
+
+
 def test_generate_best_of(flan_t5_xxl_url, hf_headers):
     client = Client(flan_t5_xxl_url, hf_headers)
     response = client.generate(
diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py
index 0bf80f8c..63b5258d 100644
--- a/clients/python/text_generation/client.py
+++ b/clients/python/text_generation/client.py
@@ -62,7 +62,7 @@ class Client:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: int = 20,
+        max_new_tokens: Optional[int] = None,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
@@ -157,7 +157,7 @@ class Client:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: int = 20,
+        max_new_tokens: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
@@ -312,7 +312,7 @@ class AsyncClient:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: int = 20,
+        max_new_tokens: Optional[int] = None,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
@@ -405,7 +405,7 @@ class AsyncClient:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: int = 20,
+        max_new_tokens: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
index aa02d8d8..7fa8033e 100644
--- a/clients/python/text_generation/types.py
+++ b/clients/python/text_generation/types.py
@@ -9,7 +9,7 @@ class Parameters(BaseModel):
     # Activate logits sampling
     do_sample: bool = False
     # Maximum number of generated tokens
-    max_new_tokens: int = 20
+    max_new_tokens: Optional[int] = None
     # The parameter for repetition penalty. 1.0 means no penalty.
     # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
     repetition_penalty: Optional[float] = None

From 0595bf3e9a15807d038e41f09c0f7e1c595d2417 Mon Sep 17 00:00:00 2001
From: dtlzhuangz <139844877+dtlzhuangz@users.noreply.github.com>
Date: Wed, 31 Jan 2024 19:05:49 +0800
Subject: [PATCH 22/31] feat: eetq gemv optimization when batch_size <= 4
 (#1502)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Add TensorRT-LLM weight-only GEMV kernel support. We extract GEMV kernel
from
[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/tree/main/cpp/tensorrt_llm/kernels/weightOnlyBatchedGemv)
to accelerate the decode speed of EETQ when batch_size is smaller or
equal to 4.

- Features

1. There is almost no loss of quantization accuracy.
2. The speed of decoding is 13% - 27% faster than original EETQ which
utilizes GEMM kernel.

- Test
Below is our test on 3090. Environment: torch=2.0.1, cuda=11.8, nvidia
driver: 525.78.01
prompt=1024, max_new_tokens=50

![image](https://github.com/huggingface/text-generation-inference/assets/139844877/98e63b23-23cd-452f-91bd-55ccdc9b7021)


![image](https://github.com/huggingface/text-generation-inference/assets/139844877/5c3132ff-fc1c-4b20-a83f-59b3d5f586b7)



## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 server/Makefile-eetq | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/Makefile-eetq b/server/Makefile-eetq
index 5e8e9830..8c060987 100644
--- a/server/Makefile-eetq
+++ b/server/Makefile-eetq
@@ -1,4 +1,4 @@
-eetq_commit := 323827dd471458a84e9c840f614e4592b157a4b1
+eetq_commit := 71adb5e191bb8290069a580abff0355d7b2dd5c9
 
 eetq:
     # Clone eetq
@@ -6,7 +6,7 @@ eetq:
 	git clone https://github.com/NetEase-FuXi/EETQ.git eetq
 
 build-eetq: eetq
-	cd eetq && git fetch && git checkout $(eetq_commit)
+	cd eetq && git fetch && git checkout $(eetq_commit) && git submodule update --init --recursive
 	cd eetq && python setup.py build
 
 install-eetq: build-eetq

From 2ae36a97fd9439de883bb5600db26634ee34d494 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Wed, 31 Jan 2024 11:26:22 -0500
Subject: [PATCH 23/31] fix: improve messages api docs content and formatting
 (#1506)

This PR simply updates the messages api docs to address content changes
and make format consistent
---
 docs/source/messages_api.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/source/messages_api.md b/docs/source/messages_api.md
index 899de865..1e342686 100644
--- a/docs/source/messages_api.md
+++ b/docs/source/messages_api.md
@@ -1,8 +1,8 @@
 # Messages API
 
-_Messages API is compatible to OpenAI Chat Completion API_
+Text Generation Inference (TGI) now supports the Messages API, which is fully compatible with the OpenAI Chat Completion API. This feature is available starting from version 1.4.0. You can use OpenAI's client libraries or third-party libraries expecting OpenAI schema to interact with TGI's Messages API. Below are some examples of how to utilize this compatibility.
 
-Text Generation Inference (TGI) now supports the Message API which is fully compatible with the OpenAI Chat Completion API. This means you can use OpenAI's client libraries to interact with TGI's Messages API. Below are some examples of how to utilize this compatibility.
+> **Note:** The Messages API is supported from TGI version 1.4.0 and above. Ensure you are using a compatible version to access this feature.
 
 ## Making a Request
 
@@ -87,7 +87,7 @@ TGI can be deployed on various cloud providers for scalable and robust text gene
 
 ## Amazon SageMaker
 
-To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`. 
+To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`.
 
 This will modify the `/invocations` route to accept Messages dictonaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
 
@@ -98,30 +98,30 @@ import boto3
 from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
 
 try:
-	role = sagemaker.get_execution_role()
+ role = sagemaker.get_execution_role()
 except ValueError:
-	iam = boto3.client('iam')
-	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
+ iam = boto3.client('iam')
+ role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
 
 # Hub Model configuration. https://huggingface.co/models
 hub = {
-	'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
-	'SM_NUM_GPUS': json.dumps(1),
-    'MESSAGES_API_ENABLED': True
+ 'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
+ 'SM_NUM_GPUS': json.dumps(1),
+ 'MESSAGES_API_ENABLED': True
 }
 
 # create Hugging Face Model Class
 huggingface_model = HuggingFaceModel(
-	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
-	env=hub,
-	role=role, 
+ image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
+ env=hub,
+ role=role, 
 )
 
 # deploy model to SageMaker Inference
 predictor = huggingface_model.deploy(
-	initial_instance_count=1,
-	instance_type="ml.g5.2xlarge",
-	container_startup_health_check_timeout=300,
+ initial_instance_count=1,
+ instance_type="ml.g5.2xlarge",
+ container_startup_health_check_timeout=300,
   )
   
 # send request
@@ -131,4 +131,4 @@ predictor.predict({
         {"role": "user", "content": "What is deep learning?"}
     ]
 })
-```
\ No newline at end of file
+```

From 13c62be467953c4762eeb15fa418bc0f6c716acd Mon Sep 17 00:00:00 2001
From: Dean Wyatte <2512762+dwyatte@users.noreply.github.com>
Date: Thu, 1 Feb 2024 01:34:11 -0700
Subject: [PATCH 24/31] GPTNeoX: Use static rotary embedding (#1498)

# What does this PR do?

`transformers` 4.35 removed rotary embeddings from GPTNeoX's weights
([link to line
diff](https://github.com/huggingface/transformers/commit/253f9a3f9716d08a81fb305fe71f983122eb608b#diff-0e2a05d86c82e96f516db8c14070ceb36f53ca44c6bc21a9cd92ad2e777b9cf1R298)).
This applies the same fix as
https://github.com/huggingface/text-generation-inference/pull/793 which
generates them on-the-fly using the appropriate value from the config
file

Fixes
https://github.com/huggingface/text-generation-inference/issues/1460

## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [x] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

@OlivierDehaene OR @Narsil
---
 .../models/custom_modeling/flash_neox_modeling.py        | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index eea5f787..3ee344e4 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -91,6 +91,8 @@ class FlashNeoxAttention(torch.nn.Module):
         self.hidden_size = hidden_size
         self.head_size = hidden_size // num_heads
 
+        self.rotary_dim = int(config.rotary_pct * self.head_size)
+
         if self.num_heads % weights.process_group.size() != 0:
             raise ValueError(
                 f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
@@ -98,8 +100,11 @@ class FlashNeoxAttention(torch.nn.Module):
             )
         self.num_heads = self.num_heads // weights.process_group.size()
 
-        self.rotary_emb = PositionRotaryEmbedding.load(
-            config=config, prefix=f"{prefix}.rotary_emb", weights=weights
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.rotary_dim,
+            base=config.rotary_emb_base,
+            device=weights.device,
         )
 
         self.softmax_scale = self.head_size ** (-0.5)

From 94d243b3d7916879f1735d4a6f231e915765c1c4 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 1 Feb 2024 10:23:37 +0100
Subject: [PATCH 25/31] Freshen up the README.

---
 README.md | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 5fdb9f14..c4d84efa 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ to power Hugging Chat, the Inference API and Inference Endpoint.
   - [Local Install](#local-install)
   - [CUDA Kernels](#cuda-kernels)
 - [Optimized architectures](#optimized-architectures)
-- [Run Falcon](#run-falcon)
+- [Run Mistral](#run-a-model)
   - [Run](#run)
   - [Quantization](#quantization)
 - [Develop](#develop)
@@ -42,7 +42,11 @@ Text Generation Inference (TGI) is a toolkit for deploying and serving Large Lan
 - Token streaming using Server-Sent Events (SSE)
 - Continuous batching of incoming requests for increased total throughput
 - Optimized transformers code for inference using [Flash Attention](https://github.com/HazyResearch/flash-attention) and [Paged Attention](https://github.com/vllm-project/vllm) on the most popular architectures
-- Quantization with [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) and [GPT-Q](https://arxiv.org/abs/2210.17323)
+- Quantization with :
+  - [bitsandbytes](https://github.com/TimDettmers/bitsandbytes)
+  - [GPT-Q](https://arxiv.org/abs/2210.17323)
+  - [EETQ](https://github.com/NetEase-FuXi/EETQ)
+  - [AWQ](https://github.com/casper-hansen/AutoAWQ)
 - [Safetensors](https://github.com/huggingface/safetensors) weight loading
 - Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
 - Logits warper (temperature scaling, top-p, top-k, repetition penalty, more details see [transformers.LogitsProcessor](https://huggingface.co/docs/transformers/internal/generation_utils#transformers.LogitsProcessor))
@@ -51,6 +55,14 @@ Text Generation Inference (TGI) is a toolkit for deploying and serving Large Lan
 - Custom Prompt Generation: Easily generate text by providing custom prompts to guide the model's output
 - Fine-tuning Support: Utilize fine-tuned models for specific tasks to achieve higher accuracy and performance
 
+### Hardware support
+
+- [Nvidia](https://github.com/huggingface/text-generation-inference/pkgs/container/text-generation-inference)
+- [AMD](https://github.com/huggingface/text-generation-inference/pkgs/container/text-generation-inference) (-rocm)
+- [Inferentia](https://github.com/huggingface/optimum-neuron/tree/main/text-generation-inference)
+- [Intel GPU](https://github.com/huggingface/text-generation-inference/pull/1475)
+- [Gaudi](https://github.com/huggingface/tgi-gaudi)
+
 
 ## Get Started
 
@@ -154,7 +166,7 @@ Python 3.9, e.g. using `conda`:
 ```shell
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 
-conda create -n text-generation-inference python=3.9
+conda create -n text-generation-inference python=3.11
 conda activate text-generation-inference
 ```
 
@@ -180,7 +192,7 @@ Then run:
 
 ```shell
 BUILD_EXTENSIONS=True make install # Install repository and HF/transformer fork with CUDA kernels
-make run-falcon-7b-instruct
+text-generation-launcher --model-id mistralai/Mistral-7B-Instruct-v0.2
 ```
 
 **Note:** on some machines, you may also need the OpenSSL libraries and gcc. On Linux machines, run:
@@ -189,13 +201,6 @@ make run-falcon-7b-instruct
 sudo apt-get install libssl-dev gcc -y
 ```
 
-### CUDA Kernels
-
-The custom CUDA kernels are only tested on NVIDIA A100, AMD MI210 and AMD MI250. If you have any installation or runtime issues, you can remove
-the kernels by using the `DISABLE_CUSTOM_KERNELS=True` environment variable.
-
-Be aware that the official Docker image has them enabled by default.
-
 ## Optimized architectures
 
 TGI works out of the box to serve optimized models for all modern models. They can be found in [this list](https://huggingface.co/docs/text-generation-inference/supported_models).
@@ -210,12 +215,12 @@ or
 
 
 
-## Run Falcon
+## Run locally
 
 ### Run
 
 ```shell
-make run-falcon-7b-instruct
+text-generation-launcher --model-id mistralai/Mistral-7B-Instruct-v0.2
 ```
 
 ### Quantization
@@ -223,7 +228,7 @@ make run-falcon-7b-instruct
 You can also quantize the weights with bitsandbytes to reduce the VRAM requirement:
 
 ```shell
-make run-falcon-7b-instruct-quantize
+text-generation-launcher --model-id mistralai/Mistral-7B-Instruct-v0.2 --quantize 
 ```
 
 4bit quantization is available using the [NF4 and FP4 data types from bitsandbytes](https://arxiv.org/pdf/2305.14314.pdf). It can be enabled by providing `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` as a command line argument to `text-generation-launcher`.

From 9ad7b6a1a12f8cd6b715be9f0ca85603e0a2b002 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 1 Feb 2024 13:29:04 +0100
Subject: [PATCH 26/31] Hotfix the / health - route. (#1515)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 router/src/server.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/router/src/server.rs b/router/src/server.rs
index 52ed03df..b4d26158 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -936,6 +936,7 @@ pub async fn run(
     // Define base and health routes
     let base_routes = Router::new()
         .route("/", post(compat_generate))
+        .route("/", get(health))
         .route("/info", get(get_model_info))
         .route("/generate", post(generate))
         .route("/generate_stream", post(generate_stream))

From 1e03b61b5c56e2ed5c723457df21cc18d48c1854 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 1 Feb 2024 14:36:10 +0000
Subject: [PATCH 27/31] Revert "Modify default for max_new_tokens in python
 client (#1336)"

This reverts commit 2d56f106a60c7b698705494e7539f8a7e4c85dd9.

It causes a breaking in our integrations-tests.
---
 clients/python/tests/test_client.py      | 16 ----------------
 clients/python/text_generation/client.py |  8 ++++----
 clients/python/text_generation/types.py  |  2 +-
 3 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py
index 775e7a6c..1e25e1b1 100644
--- a/clients/python/tests/test_client.py
+++ b/clients/python/tests/test_client.py
@@ -21,22 +21,6 @@ def test_generate(flan_t5_xxl_url, hf_headers):
     assert not response.details.tokens[0].special
 
 
-def test_generate_max_new_tokens_not_set(flan_t5_xxl_url, hf_headers):
-    client = Client(flan_t5_xxl_url, hf_headers)
-    response = client.generate("test", decoder_input_details=True)
-
-    assert response.generated_text != ""
-    assert response.details.finish_reason == FinishReason.EndOfSequenceToken
-    assert response.details.generated_tokens > 1
-    assert response.details.seed is None
-    assert len(response.details.prefill) == 1
-    assert response.details.prefill[0] == InputToken(id=0, text="<pad>", logprob=None)
-    assert len(response.details.tokens) > 1
-    assert response.details.tokens[0].id == 3
-    assert response.details.tokens[0].text == " "
-    assert not response.details.tokens[0].special
-
-
 def test_generate_best_of(flan_t5_xxl_url, hf_headers):
     client = Client(flan_t5_xxl_url, hf_headers)
     response = client.generate(
diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py
index 63b5258d..0bf80f8c 100644
--- a/clients/python/text_generation/client.py
+++ b/clients/python/text_generation/client.py
@@ -62,7 +62,7 @@ class Client:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: Optional[int] = None,
+        max_new_tokens: int = 20,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
@@ -157,7 +157,7 @@ class Client:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: Optional[int] = None,
+        max_new_tokens: int = 20,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
@@ -312,7 +312,7 @@ class AsyncClient:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: Optional[int] = None,
+        max_new_tokens: int = 20,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
@@ -405,7 +405,7 @@ class AsyncClient:
         self,
         prompt: str,
         do_sample: bool = False,
-        max_new_tokens: Optional[int] = None,
+        max_new_tokens: int = 20,
         repetition_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
index 7fa8033e..aa02d8d8 100644
--- a/clients/python/text_generation/types.py
+++ b/clients/python/text_generation/types.py
@@ -9,7 +9,7 @@ class Parameters(BaseModel):
     # Activate logits sampling
     do_sample: bool = False
     # Maximum number of generated tokens
-    max_new_tokens: Optional[int] = None
+    max_new_tokens: int = 20
     # The parameter for repetition penalty. 1.0 means no penalty.
     # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
     repetition_penalty: Optional[float] = None

From ee1cf51ce796e4b034eedaf3e909b4c902eae70c Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Thu, 1 Feb 2024 09:39:32 -0500
Subject: [PATCH 28/31] fix: tokenizer config should use local model path when
 possible (#1518)

This PR fixes the issue with loading a local tokenizer config.
Previously the default functionality would look in the current working
directory. Now if a local model path is specified we will check that
directory for the tokenizer_config.

## Examples of valid commands

uses tokenizer_config from hub
```
text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta
```

use tokenizer_config from local model path
```
text-generation-launcher \
  --model-id ~/.cache/huggingface/hub/models--HuggingFaceH4--zephyr-7b-beta/snapshots/dc24cabd13eacd3ae3a5fe574bd645483a335a4a/
```

use specific tokenizer_config file
```
 text-generation-launcher \
  --model-id ~/.cache/huggingface/hub/models--HuggingFaceH4--zephyr-7b-beta/snapshots/dc24cabd13eacd3ae3a5fe574bd645483a335a4a/ \
  --tokenizer-config-path ~/.cache/huggingface/hub/models--HuggingFaceH4--zephyr-7b-beta/snapshots/dc24cabd13eacd3ae3a5fe574bd645483a335a4a/tokenizer_config.json


```

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
---
 router/src/lib.rs  |  2 +-
 router/src/main.rs | 49 +++++++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/router/src/lib.rs b/router/src/lib.rs
index fc5670a0..07360e78 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -37,7 +37,7 @@ pub struct HubTokenizerConfig {
 }
 
 impl HubTokenizerConfig {
-    pub fn from_file(filename: &str) -> Self {
+    pub fn from_file(filename: &std::path::Path) -> Self {
         let content = std::fs::read_to_string(filename).unwrap();
         serde_json::from_str(&content).unwrap_or_default()
     }
diff --git a/router/src/main.rs b/router/src/main.rs
index 495fd5bc..2a080468 100644
--- a/router/src/main.rs
+++ b/router/src/main.rs
@@ -154,12 +154,6 @@ async fn main() -> Result<(), RouterError> {
     let local_path = Path::new(&tokenizer_name);
     let local_model = local_path.exists() && local_path.is_dir();
 
-    // Load tokenizer config
-    // This will be used to format the chat template
-    let local_tokenizer_config_path =
-        tokenizer_config_path.unwrap_or("tokenizer_config.json".to_string());
-    let local_tokenizer_config = Path::new(&local_tokenizer_config_path).exists();
-
     // Shared API builder initialization
     let api_builder = || {
         let mut builder = ApiBuilder::new()
@@ -230,24 +224,35 @@ async fn main() -> Result<(), RouterError> {
     };
 
     // Load tokenizer config if found locally, or check if we can get it from the API if needed
-    let tokenizer_config = if local_tokenizer_config {
+    let tokenizer_config = if let Some(path) = tokenizer_config_path {
+        tracing::info!("Using local tokenizer config from user specified path");
+        HubTokenizerConfig::from_file(&std::path::PathBuf::from(path))
+    } else if local_model {
         tracing::info!("Using local tokenizer config");
-        HubTokenizerConfig::from_file(&local_tokenizer_config_path)
-    } else if let Some(api) = api {
-        tracing::info!("Using the Hugging Face API to retrieve tokenizer config");
-        get_tokenizer_config(&api.repo(Repo::with_revision(
-            tokenizer_name.to_string(),
-            RepoType::Model,
-            revision.unwrap_or_else(|| "main".to_string()),
-        )))
-        .await
-        .unwrap_or_else(|| {
-            tracing::warn!("Could not retrieve tokenizer config from the Hugging Face hub.");
-            HubTokenizerConfig::default()
-        })
+        HubTokenizerConfig::from_file(&local_path.join("tokenizer_config.json"))
     } else {
-        tracing::warn!("Could not find tokenizer config locally and no revision specified");
-        HubTokenizerConfig::default()
+        match api {
+            Some(api) => {
+                tracing::info!("Using the Hugging Face API to retrieve tokenizer config");
+                let repo = Repo::with_revision(
+                    tokenizer_name.to_string(),
+                    RepoType::Model,
+                    revision.unwrap_or("main".to_string()),
+                );
+                get_tokenizer_config(&api.repo(repo))
+                    .await
+                    .unwrap_or_else(|| {
+                        tracing::warn!(
+                            "Could not retrieve tokenizer config from the Hugging Face hub."
+                        );
+                        HubTokenizerConfig::default()
+                    })
+            }
+            None => {
+                tracing::warn!("Could not find tokenizer config locally and no API specified");
+                HubTokenizerConfig::default()
+            }
+        }
     };
 
     if tokenizer.is_none() {

From 0e97af456af3102ed4f927f7b7e870ec976079ae Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 1 Feb 2024 16:26:48 +0100
Subject: [PATCH 29/31] Updating tokenizers. (#1517)

# What does this PR do?

<!--
Congratulations! You've made it this far! You're not quite done yet
though.

Once merged, your PR is going to appear in the release notes with the
title you set, so make sure it's a great title that fully reflects the
extent of your awesome contribution.

Then, please replace this with a description of the change and which
issue is fixed (if applicable). Please also include relevant motivation
and context. List any dependencies (if any) that are required for this
change.

Once you're done, someone will review your PR shortly (see the section
"Who can review?" below to tag some potential reviewers). They may
suggest changes to make the code even better. If no one reviewed your PR
after a week has passed, don't hesitate to post a new comment
@-mentioning the same persons---sometimes notifications get lost.
-->

<!-- Remove if not applicable -->

Fixes # (issue)


## Before submitting
- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?


## Who can review?

Anyone in the community is free to review the PR once the tests have
passed. Feel free to tag
members/contributors who may be interested in your PR.

<!-- Your PR will be replied to more quickly if you can figure out the
right person to tag with @


@OlivierDehaene OR @Narsil

 -->
---
 router/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/router/Cargo.toml b/router/Cargo.toml
index f6f16dae..1a7ceb70 100644
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@@ -32,7 +32,7 @@ reqwest = { version = "0.11.20", features = [] }
 serde = "1.0.188"
 serde_json = "1.0.107"
 thiserror = "1.0.48"
-tokenizers = { version = "0.14.0", features = ["http"] }
+tokenizers = { version = "0.15.1", features = ["http"] }
 tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
 tokio-stream = "0.1.14"
 tower-http = { version = "0.4.4", features = ["cors"] }

From 3ab578b4160b200ad601bbd30bd8ecf39b979326 Mon Sep 17 00:00:00 2001
From: Pedro Cuenca <pedro@huggingface.co>
Date: Fri, 2 Feb 2024 14:05:30 +0100
Subject: [PATCH 30/31] [docs] Fix link to Install CLI (#1526)

# What does this PR do?

Attempts to fix a link from Using TGI CLI to Installation.


## Before submitting
- [x] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [x] Did you read the [contributor
guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests),
      Pull Request section?
- [ ] Was this discussed/approved via a Github issue or the
[forum](https://discuss.huggingface.co/)? Please add a link
      to it if that's the case.
- [ ] Did you make sure to update the documentation with your changes?
Here are the
[documentation
guidelines](https://github.com/huggingface/transformers/tree/main/docs),
and
[here are tips on formatting
docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation).
- [ ] Did you write any new necessary tests?
---
 docs/source/basic_tutorials/using_cli.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/basic_tutorials/using_cli.md b/docs/source/basic_tutorials/using_cli.md
index 82c10e6b..a3a65f60 100644
--- a/docs/source/basic_tutorials/using_cli.md
+++ b/docs/source/basic_tutorials/using_cli.md
@@ -1,6 +1,6 @@
 # Using TGI CLI
 
-You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](./installation#install-cli).
+You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](../installation#install-cli).
 
 `text-generation-server` lets you download the model with `download-weights` command like below 👇 
 

From 0da00be52c9e591f8890ab07eea05cc15b9b127b Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Fri, 2 Feb 2024 10:31:11 -0500
Subject: [PATCH 31/31] feat: add ie update to message docs (#1523)

update messages api docs and add Hugging Face Inference Endpoints
integrations section/instructions

---------

Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
---
 docs/source/messages_api.md | 45 +++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/docs/source/messages_api.md b/docs/source/messages_api.md
index 1e342686..939850aa 100644
--- a/docs/source/messages_api.md
+++ b/docs/source/messages_api.md
@@ -4,6 +4,15 @@ Text Generation Inference (TGI) now supports the Messages API, which is fully co
 
 > **Note:** The Messages API is supported from TGI version 1.4.0 and above. Ensure you are using a compatible version to access this feature.
 
+#### Table of Contents
+
+- [Making a Request](#making-a-request)
+- [Streaming](#streaming)
+- [Synchronous](#synchronous)
+- [Hugging Face Inference Endpoints](#hugging-face-inference-endpoints)
+- [Cloud Providers](#cloud-providers)
+  - [Amazon SageMaker](#amazon-sagemaker)
+
 ## Making a Request
 
 You can make a request to TGI's Messages API using `curl`. Here's an example:
@@ -81,6 +90,38 @@ chat_completion = client.chat.completions.create(
 print(chat_completion)
 ```
 
+## Hugging Face Inference Endpoints
+
+The Messages API is integrated with [Inference Endpoints](https://huggingface.co/inference-endpoints/dedicated).  
+Every endpoint that uses "Text Generation Inference" with an LLM, which has a chat template can now be used. Below is an example of how to use IE with TGI using OpenAI's Python client library:
+
+> **Note:** Make sure to replace `base_url` with your endpoint URL and to include `v1/` at the end of the URL. The `api_key` should be replaced with your Hugging Face API key.
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    # replace with your endpoint url, make sure to include "v1/" at the end
+    base_url="https://vlzz10eq3fol3429.us-east-1.aws.endpoints.huggingface.cloud/v1/",
+    # replace with your API key
+    api_key="hf_XXX"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=True
+)
+
+# iterate and print stream
+for message in chat_completion:
+    print(message.choices[0].delta.content, end="")
+```
+
 ## Cloud Providers
 
 TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
@@ -114,7 +155,7 @@ hub = {
 huggingface_model = HuggingFaceModel(
  image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
  env=hub,
- role=role, 
+ role=role,
 )
 
 # deploy model to SageMaker Inference
@@ -123,7 +164,7 @@ predictor = huggingface_model.deploy(
  instance_type="ml.g5.2xlarge",
  container_startup_health_check_timeout=300,
   )
-  
+
 # send request
 predictor.predict({
 "messages": [